diff --git a/build.gradle b/build.gradle index 647439e202b..aeff6274d98 100755 --- a/build.gradle +++ b/build.gradle @@ -30,6 +30,7 @@ plugins { id 'com.bmuschko.docker-java-application' version '9.4.0' id "cz.alenkacz.gradle.scalafmt" version "1.16.2" id "java-library" + id 'scala' } apply plugin: 'java' @@ -44,7 +45,8 @@ apply plugin: 'ManifestClasspath' apply plugin: 'scalafmt' group = 'beam' -version = '1.0.beta' +version = '1.0.beta.1' +version = '0.9.13-beta-v6' description = """""" @@ -145,13 +147,20 @@ configurations.all { // we need this in order to get Elki library working fine. force 'net.jafama:jafama:2.3.2' // we need this in order to get Elki library working fine. - force 'it.unimi.dsi:fastutil:8.5.4' + force 'it.unimi.dsi:fastutil:8.5.12' force 'org.apache.logging.log4j:log4j-core:2.15.0' force 'org.apache.logging.log4j:log4j-api:2.15.0' + force 'org.apache.parquet:parquet-avro:1.12.3' + force 'org.apache.parquet:parquet-hadoop:1.12.3' + force 'org.apache.parquet:parquet-common:1.12.3' + force 'org.apache.parquet:parquet-column:1.12.3' + force 'org.apache.parquet:parquet-encoding:1.12.3' } exclude group: "log4j", module: "log4j" exclude group: "org.matsim", module: "matsim" + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + exclude group: 'org.slf4j', module: 'slf4j-reload4j' } dependencies { @@ -167,6 +176,13 @@ dependencies { implementation group: 'com.google.inject.extensions', name: 'guice-multibindings', version: '4.1.0' implementation group: 'org.apache.commons', name: 'commons-collections4', version: '4.4' implementation group: 'org.apache.commons', name: 'commons-math3', version: '3.6.1' + implementation 'org.apache.commons:commons-lang3:3.12.0' + implementation 'commons-io:commons-io:2.11.0' + implementation 'org.apache.commons:commons-csv:1.8' + implementation 'org.apache.commons:commons-text:1.10.0' + implementation 'commons-lang:commons-lang:2.6' + implementation 'org.glassfish:javax.el:3.0.0' + implementation 'javax.el:javax.el-api:3.0.0' implementation group: 'org.apache.httpcomponents', name: 'fluent-hc', version: '4.5.13' // Apache 2.0 @@ -208,7 +224,7 @@ dependencies { implementation(group: 'org.matsim.contrib', name: 'decongestion', version: '12.0') - implementation(group: 'com.github.LBNL-UCB-STI.matsim', name: 'matsim', version: '12.0-beam-8') { + implementation(group: 'com.github.LBNL-UCB-STI.matsim', name: 'matsim', version: '12.0-beam-9') { exclude group: 'log4j', module: 'log4j' } @@ -229,6 +245,8 @@ dependencies { implementation("com.github.LBNL-UCB-STI:jsprit-wrapper:v0.5.1") implementation("com.github.LBNL-UCB-STI:omx-java:v2.0.2") + implementation group: 'dev.zarr', name: 'jzarr', version: '0.4.2' + testImplementation group: 'junit', name: 'junit', version: '4.8' testImplementation group: 'org.mockito', name: 'mockito-inline', version: '2.27.0' testImplementation group: "org.mockito", name: "mockito-core", version: "2.28.2" @@ -281,12 +299,12 @@ dependencies { // https://mvnrepository.com/artifact/io.circe/circe-parser_2.12 implementation group: 'io.circe', name: "circe-parser_${scalaBinaryVersion}", version: circeBinaryVersion - implementation group: 'com.typesafe.play', name: "play-json_${scalaBinaryVersion}", version: '2.6.3' + implementation group: 'com.typesafe.play', name: "play-json_${scalaBinaryVersion}", version: '2.8.1' implementation(group: 'com.github.romix.akka', name: "akka-kryo-serialization_${scalaBinaryVersion}", version: '0.5.2') { exclude group: 'com.esotericsoftware', module: 'kryo' } - implementation group: 'com.esotericsoftware', name: 'kryo', version: '4.0.2' + implementation group: 'com.esotericsoftware', name: 'kryo', version: '4.0.3' implementation "com.github.vagmcs:optimus_${scalaBinaryVersion}:3.1.0" implementation "com.github.vagmcs:optimus-solver-oj_${scalaBinaryVersion}:3.1.0" @@ -328,16 +346,19 @@ dependencies { implementation group: 'com.zaxxer', name: 'nuprocess', version: '1.2.4' def parquet = "1.12.3" - implementation group: 'org.apache.parquet', name: 'parquet-hadoop', version: parquet - implementation group: 'org.apache.parquet', name: 'parquet-avro', version: parquet - implementation(group: 'org.apache.hadoop', name: 'hadoop-client', version: '2.7.3') { - exclude group: 'org.slf4j', module: 'slf4j-log4j12' + // Parquet dependencies with shading + implementation 'org.apache.parquet:parquet-avro:1.12.3' + implementation 'org.apache.parquet:parquet-hadoop:1.12.3' + implementation 'org.apache.hadoop:hadoop-common:3.3.4' + implementation 'org.apache.hadoop:hadoop-mapreduce-client-core:3.3.4' + implementation('org.apache.hadoop:hadoop-client:3.3.6') { // Exclude `ASM` because it is binary incompatible with the one which is gotten from `com.conveyal:kryo-tools`: `org.ow2.asm:asm:5.0.4` exclude group: 'asm', module: 'asm' // Exclude jsp-api because it contains an older version of javax.el (Expression Language) which is incompatible // with one that Hibernate-validator uses exclude group: 'javax.servlet.jsp', module: 'jsp-api' } + implementation 'it.unimi.dsi:fastutil:8.5.12' implementation(group: 'com.lihaoyi', name: "sourcecode_${scalaBinaryVersion}", version: '0.1.9') @@ -792,6 +813,9 @@ tasks.register('createDockerfile', Dockerfile) { addFile 'entrypoint.sh', 'entrypoint.sh' runCommand('chmod +x /app/entrypoint.sh') entryPoint('/app/entrypoint.sh') + + mainClassName = 'beam.sim.RunBeam' // Replace with the desired main class + } ext.getDockerTag = { -> @@ -816,17 +840,30 @@ ext.getDockerRepository = { -> } } -tasks.register('buildImageWithoutTags', DockerBuildImage) { dependsOn createDockerfile } +tasks.register('buildImageWithoutTags', DockerBuildImage) { + dependsOn createDockerfile + platform = 'linux/amd64' +} // one can build a docker image with command ./gradlew -Ptag=beammodel/beam:0.9.12 buildImage tasks.register('buildImage', DockerTagImage) { description 'Builds the Docker BEAM image, all code and test data are included into the image.' - group 'Docker' + group 'Docker ' dependsOn buildImageWithoutTags + // Ensure platform compatibility + inputs.property("platform", "linux/amd64") + tag = getDockerTag() repository = getDockerRepository() println("The new image will be tagged '$getDockerRepository:$getDockerTag'. To change use 'tag' project parameter, i.e. '-Ptag='") targetImageId buildImageWithoutTags.getImageId() -} + + // Force OCI format compatibility + doFirst { + if (!project.hasProperty('tag')) { + throw new GradleException("Please specify a tag using -Ptag=repository:version") + } + } +} \ No newline at end of file diff --git a/gcp/src/main/python/createImage/requirements.txt b/gcp/src/main/python/createImage/requirements.txt index d491d51f59c..65aaf1a19ea 100644 --- a/gcp/src/main/python/createImage/requirements.txt +++ b/gcp/src/main/python/createImage/requirements.txt @@ -1,3 +1 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1c868ce84345515e4900fe7e800b6c1e2e5560da5101641c9f1afcd2f81a092c -size 32 +google-api-python-client==2.47.0 \ No newline at end of file diff --git a/gcp/src/main/python/createSnapshot/requirements.txt b/gcp/src/main/python/createSnapshot/requirements.txt index d491d51f59c..65aaf1a19ea 100644 --- a/gcp/src/main/python/createSnapshot/requirements.txt +++ b/gcp/src/main/python/createSnapshot/requirements.txt @@ -1,3 +1 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1c868ce84345515e4900fe7e800b6c1e2e5560da5101641c9f1afcd2f81a092c -size 32 +google-api-python-client==2.47.0 \ No newline at end of file diff --git a/gcp/src/main/python/delete-snapshot/requirements.txt b/gcp/src/main/python/delete-snapshot/requirements.txt index d491d51f59c..65aaf1a19ea 100644 --- a/gcp/src/main/python/delete-snapshot/requirements.txt +++ b/gcp/src/main/python/delete-snapshot/requirements.txt @@ -1,3 +1 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1c868ce84345515e4900fe7e800b6c1e2e5560da5101641c9f1afcd2f81a092c -size 32 +google-api-python-client==2.47.0 \ No newline at end of file diff --git a/gcp/src/main/python/deploy_beam/requirements.txt b/gcp/src/main/python/deploy_beam/requirements.txt index d491d51f59c..65aaf1a19ea 100644 --- a/gcp/src/main/python/deploy_beam/requirements.txt +++ b/gcp/src/main/python/deploy_beam/requirements.txt @@ -1,3 +1 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1c868ce84345515e4900fe7e800b6c1e2e5560da5101641c9f1afcd2f81a092c -size 32 +google-api-python-client==2.47.0 \ No newline at end of file diff --git a/gcp/src/main/python/notify_idle/requirements.txt b/gcp/src/main/python/notify_idle/requirements.txt index eae558c6268..1c152028ad8 100644 --- a/gcp/src/main/python/notify_idle/requirements.txt +++ b/gcp/src/main/python/notify_idle/requirements.txt @@ -1,3 +1 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:83ebe9988d6a14378d99baa36d3e5d8dff875e4b543dd4227bd83f426a6631c7 -size 33 +google-api-python-client==2.47.0 diff --git a/gcp/src/main/python/update-base-image/requirements.txt b/gcp/src/main/python/update-base-image/requirements.txt index 1eab71a063e..cb6b517d4a2 100644 --- a/gcp/src/main/python/update-base-image/requirements.txt +++ b/gcp/src/main/python/update-base-image/requirements.txt @@ -1,3 +1,2 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0362ceda56a09f8fe3c951e26ba0df00be99de622721a15b8b23c98c61073658 -size 57 +functions-framework==3.* +google-api-python-client==2.47.0 \ No newline at end of file diff --git a/gcp/src/main/python/updateEnvVarsForProvidedFunctionNames/requirements.txt b/gcp/src/main/python/updateEnvVarsForProvidedFunctionNames/requirements.txt index d491d51f59c..65aaf1a19ea 100644 --- a/gcp/src/main/python/updateEnvVarsForProvidedFunctionNames/requirements.txt +++ b/gcp/src/main/python/updateEnvVarsForProvidedFunctionNames/requirements.txt @@ -1,3 +1 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1c868ce84345515e4900fe7e800b6c1e2e5560da5101641c9f1afcd2f81a092c -size 32 +google-api-python-client==2.47.0 \ No newline at end of file diff --git a/production/seattle b/production/seattle index e00e15fe5d9..0e24a673b93 160000 --- a/production/seattle +++ b/production/seattle @@ -1 +1 @@ -Subproject commit e00e15fe5d9cb9f9b57e695201abb0fe334b50e0 +Subproject commit 0e24a673b93b5998d06735fde2edabf07bea3693 diff --git a/production/sfbay b/production/sfbay index e8f7b0da386..6825637d1c8 160000 --- a/production/sfbay +++ b/production/sfbay @@ -1 +1 @@ -Subproject commit e8f7b0da3868f6fab98fb9a6a3775c15501b04af +Subproject commit 6825637d1c8cc2907bda10364746841b306942eb diff --git a/src/main/R/freight/freight-processing.R b/src/main/R/freight/freight-processing.R index 761d19b1869..5f69d092312 100644 --- a/src/main/R/freight/freight-processing.R +++ b/src/main/R/freight/freight-processing.R @@ -30,40 +30,46 @@ isCav <- function(x) { return(x >= 4) } +### Test +# work_folder <- normalizePath("~/Workspace/Data/Scenarios/sfbay/validation_data/BEAM") +# speed <- readCsv(pp(work_folder, "/sfbay_residential_simpl_network.csv.gz")) +# ggplot(speed, aes(x=speed_beam-speed_npmrds)) + geom_histogram() + xlim(0, 100) + + # emission -emfac_sf_file <- normalizePath('~/Workspace/Models/emfac/2018/SF_2018_Annual_fleet_data_population_20240311153419.csv') -emfac_sf <- readCsv(emfac_sf_file) -emfac_sf_normalized <- emfac_sf[,.(sum_population=(sum(population))),by=.(vehicle_class, fuel)] -emfac_sf_normalized$share_population <- emfac_sf_normalized$sum_population/sum(emfac_sf_normalized$sum_population) +# emfac_sf_file <- normalizePath('~/Workspace/Models/emfac/2018/SF_2018_Annual_fleet_data_population_20240311153419.csv') +# emfac_sf <- readCsv(emfac_sf_file) +# emfac_sf_normalized <- emfac_sf[,.(sum_population=(sum(population))),by=.(vehicle_class, fuel)] +# emfac_sf_normalized$share_population <- emfac_sf_normalized$sum_population/sum(emfac_sf_normalized$sum_population) ### # Create a sample data frame -data <- data.frame( - Name = c("John", "Jane", "Bob", "Alice", "John", "Jane", "Bob", "Alice"), - Age = c(25, 30, 35, 40, 27, 32, 38, 42), - City = c("New York", "London", "Paris", "Tokyo", "New York", "London", "Paris", "Tokyo"), - Gender = c("M", "F", "M", "F", "M", "F", "M", "F") -) +# data <- data.frame( +# Name = c("John", "Jane", "Bob", "Alice", "John", "Jane", "Bob", "Alice"), +# Age = c(25, 30, 35, 40, 27, 32, 38, 42), +# City = c("New York", "London", "Paris", "Tokyo", "New York", "London", "Paris", "Tokyo"), +# Gender = c("M", "F", "M", "F", "M", "F", "M", "F") +# ) # Load the required packages -library(dplyr) -library(stringr) +# library(dplyr) +# library(stringr) # Group the data frame by 'City' and 'Gender', and concatenate 'Name' into a new column -grouped <- data %>% - group_by(City, Gender) %>% - summarise(Names = str_c(Name, collapse = ", ")) - -print(grouped) +# grouped <- data %>% +# group_by(City, Gender) %>% +# summarise(Names = str_c(Name, collapse = ", ")) +# +# print(grouped) ### RouteE - -work_folder <- normalizePath("~/Workspace/Data/FREIGHT/seattle") -household <- readCsv(pp(work_folder, "/households.csv.gz")) -ggplot(household, aes(x=income/1000)) + geom_histogram() + xlim(0, 100) +# work_folder <- normalizePath("~/Workspace/Data/FREIGHT/seattle") +# household <- readCsv(pp(work_folder, "/households.csv.gz")) +# ggplot(household, aes(x=income/1000)) + geom_histogram() + xlim(0, 100) +## work_folder <- normalizePath("~/Workspace/Data/FREIGHT/seattle") geo <- geojson_sf(pp(work_folder, "/validation/npmrds/Seattle_counties.geojson")) diff --git a/src/main/R/freight/freight-smart2-plotting.R b/src/main/R/freight/freight-smart2-plotting.R index 4fe6eed3867..6a89929d8dc 100644 --- a/src/main/R/freight/freight-smart2-plotting.R +++ b/src/main/R/freight/freight-smart2-plotting.R @@ -356,6 +356,7 @@ dgb2b_runs <- demand_growth_runs_dir, "all_b2b_growth" ) +dgb2b_runs_2018 <- readCsv(pp(demand_growth_runs_dir,"2018_base/0.events.csv.gz")) dgb2b_runs <- format_path_traversals(dgb2b_runs) dgb2b_summary <- dgb2b_runs[, diff --git a/src/main/java/beam/matsim/CustomPlansDumpingImpl.java b/src/main/java/beam/matsim/CustomPlansDumpingImpl.java index 0df9233fed6..63c6688696a 100644 --- a/src/main/java/beam/matsim/CustomPlansDumpingImpl.java +++ b/src/main/java/beam/matsim/CustomPlansDumpingImpl.java @@ -8,6 +8,7 @@ import org.matsim.api.core.v01.population.PopulationWriter; import org.matsim.core.config.Config; import org.matsim.core.config.groups.ControlerConfigGroup; +import org.matsim.core.controler.Controler; import org.matsim.core.controler.OutputDirectoryHierarchy; import org.matsim.core.controler.corelisteners.PlansDumping; import org.matsim.core.controler.events.BeforeMobsimEvent; @@ -17,6 +18,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import java.io.UncheckedIOException; +import java.nio.file.StandardCopyOption; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + @Singleton public class CustomPlansDumpingImpl implements PlansDumping, BeforeMobsimListener { static final private Logger log = LoggerFactory.getLogger(CustomPlansDumpingImpl.class); @@ -51,27 +60,92 @@ public void notifyBeforeMobsim(final BeforeMobsimEvent event) { final boolean writingPlansAtAll = writePlansInterval() > 0; final boolean regularWritePlans = writePlansInterval() > 0 && (event.getIteration() > 0 && event.getIteration() % writePlansInterval() == 0); final boolean earlyIteration = event.getIteration() <= writeMoreUntilIteration(); + if (writingPlansAtAll && (regularWritePlans || earlyIteration)) { stopwatch.beginOperation("dump all plans"); log.info("dumping plans..."); - final String inputCRS = config.plans().getInputCRS(); - final String internalCRS = config.global().getCoordinateSystem(); - if (inputCRS == null) { - new PopulationWriter(population, network).write(controlerIO.getIterationFilename(event.getIteration(), "plans.xml.gz")); - } else { - log.info("re-projecting population from " + internalCRS + " back to " + inputCRS + " for export"); + String outputFilename = controlerIO.getIterationFilename(event.getIteration(), "plans.xml.gz"); + ensureDirectoryExists(outputFilename); + + try { + writePlans(outputFilename); + log.info("finished plans dump successfully."); + } catch (Exception e) { + log.error("Failed to write plans to {}: {}", outputFilename, e.getMessage()); + throw new RuntimeException("Failed to write plans file", e); + } finally { + stopwatch.endOperation("dump all plans"); + } + } + } + + private void dumpExperiencedPlans() { + if (!config.planCalcScore().isWriteExperiencedPlans()) { + log.debug("Skipping experienced plans dump - disabled in config"); + return; + } + + stopwatch.beginOperation("dump experienced plans"); + log.info("Dumping experienced plans using our BEAM implementation..."); + + try { + String outputFilename = controlerIO.getOutputFilename(Controler.DefaultFiles.experiencedPlans); + String iterationFilename = controlerIO.getIterationFilename( + controlerConfigGroup.getLastIteration(), + Controler.DefaultFiles.experiencedPlans + ); - final CoordinateTransformation transformation = - TransformationFactory.getCoordinateTransformation( - internalCRS, - inputCRS); + ensureDirectoryExists(outputFilename); + Path fromPath = Paths.get(iterationFilename); + Path toPath = Paths.get(outputFilename); - new PopulationWriter(transformation, population, network).write(controlerIO.getIterationFilename(event.getIteration(), "plans.xml.gz")); + if (!Files.exists(fromPath)) { + // Instead of throwing an error, write current plans as experienced plans + log.warn("Experienced plans file not found at {}. Writing current plans instead.", iterationFilename); + writePlans(outputFilename); + return; } - log.info("finished plans dump."); - stopwatch.endOperation("dump all plans"); + + Files.copy(fromPath, toPath, StandardCopyOption.REPLACE_EXISTING); + log.info("Successfully copied experienced plans from {} to {}", iterationFilename, outputFilename); + + } catch (IOException e) { + log.error("Failed to copy/write experienced plans file: {}", e.getMessage()); + throw new UncheckedIOException("Failed to handle experienced plans file", e); + } finally { + stopwatch.endOperation("dump experienced plans"); } } -} + private void writePlans(String outputFilename) { + final String inputCRS = config.plans().getInputCRS(); + final String internalCRS = config.global().getCoordinateSystem(); + + if (inputCRS == null) { + new PopulationWriter(population, network).write(outputFilename); + } else { + log.info("re-projecting population from {} back to {} for export", internalCRS, inputCRS); + + final CoordinateTransformation transformation = + TransformationFactory.getCoordinateTransformation( + internalCRS, + inputCRS); + + new PopulationWriter(transformation, population, network).write(outputFilename); + } + } + + private void ensureDirectoryExists(String filename) { + try { + Path directory = Paths.get(filename).getParent(); + if (directory != null && !Files.exists(directory)) { + Files.createDirectories(directory); + log.info("Created directory: {}", directory); + } + } catch (IOException e) { + log.error("Failed to create directory for {}: {}", filename, e.getMessage()); + throw new RuntimeException("Failed to create output directory", e); + } + } +} \ No newline at end of file diff --git a/src/main/java/beam/physsim/jdeqsim/AgentSimToPhysSimPlanConverter.java b/src/main/java/beam/physsim/jdeqsim/AgentSimToPhysSimPlanConverter.java index f064f7aa95d..7eca4834ac3 100755 --- a/src/main/java/beam/physsim/jdeqsim/AgentSimToPhysSimPlanConverter.java +++ b/src/main/java/beam/physsim/jdeqsim/AgentSimToPhysSimPlanConverter.java @@ -220,7 +220,7 @@ private void setupActorsAndRunPhysSim(IterationEndsEvent iterationEndsEvent) { try { String outPath = controlerIO - .getIterationFilename(iterationNumber, "countscompare.txt"); + .getIterationFilename(iterationNumber, "countsCompare.txt"); double countsError = CountsObjectiveFunction.evaluateFromRun(outPath); log.info("counts Error: " + countsError); } catch (Exception e) { @@ -533,9 +533,10 @@ private Leg createLeg(PathTraversalEvent pte, Leg connectedLeg, Integer departur List objects = pte.linkIdsJava(); // most of the time the last link of previous leg is the first link of current leg - we are avoiding this boolean sameLinkAtTheEnd; + try { sameLinkAtTheEnd = !linkIds.isEmpty() - && pte.linkIds().head().toString().equals(Iterables.getLast(linkIds).toString()); + && String.valueOf(pte.linkIds()[0]).equals(Iterables.getLast(linkIds).toString()); } catch (java.util.NoSuchElementException e) { log.error("Mismatched path traversal in physsim plans: {}, matched leg: {}", pte, connectedLeg); return null; diff --git a/src/main/java/beam/utils/TravelTimeCalculatorHelper.java b/src/main/java/beam/utils/TravelTimeCalculatorHelper.java index 4a7c96d559e..cd682a22818 100644 --- a/src/main/java/beam/utils/TravelTimeCalculatorHelper.java +++ b/src/main/java/beam/utils/TravelTimeCalculatorHelper.java @@ -1,5 +1,6 @@ package beam.utils; +import beam.router.BeamTravelTime; import beam.utils.logging.ExponentialLoggerWrapperImpl; import org.matsim.api.core.v01.Id; import org.matsim.api.core.v01.network.Link; @@ -12,7 +13,7 @@ import java.util.*; public class TravelTimeCalculatorHelper { - public static class TravelTimePerHour implements TravelTime { + public static class TravelTimePerHour implements BeamTravelTime { private final Logger log = LoggerFactory.getLogger(TravelTimePerHour.class); private final double[][] _linkIdToTravelTimeArray; @@ -23,35 +24,35 @@ public TravelTimePerHour(int timeBinSizeInSeconds, final Map l _timeBinSizeInSeconds = timeBinSizeInSeconds; _linkIdToTravelTimeArray = initTravelTime(linkIdToTravelTimeData); } + @Override - public double getLinkTravelTime(Link link, double time, Person person, Vehicle vehicle) { - final int linkId = Integer.parseInt(link.getId().toString()); - if (linkId >= _linkIdToTravelTimeArray.length) { - if(ExponentialLoggerWrapperImpl.isNumberPowerOfTwo(++numWarnings)){ - log.warn("Got linkId {} which is out of `_linkIdToTravelTimeArray` array with length {}", linkId, _linkIdToTravelTimeArray.length); + public double getLinkTravelTime(int linkId, double time) { + if (linkId < 0 || linkId >= _linkIdToTravelTimeArray.length || _linkIdToTravelTimeArray[linkId] == null) { + if (ExponentialLoggerWrapperImpl.isNumberPowerOfTwo(++numWarnings)) { + log.warn("Invalid linkId {} or missing travel time data", linkId); } - return link.getFreespeed(); + return 0d; // Calculate travel time directly } - double[] timePerHour = _linkIdToTravelTimeArray[linkId]; - if (null == timePerHour){ - if(ExponentialLoggerWrapperImpl.isNumberPowerOfTwo(++numWarnings)){ - log.warn("Can't find travel times for link '{}'", linkId); - } - return link.getFreespeed(); - } int idx = getOffset(time); if (idx >= timePerHour.length) { - if(ExponentialLoggerWrapperImpl.isNumberPowerOfTwo(++numWarnings)) { + if (ExponentialLoggerWrapperImpl.isNumberPowerOfTwo(++numWarnings)) { log.warn("Got offset which is out of array for the link {}. Something wrong. idx: {}, time: {}, _timeBinSizeInSeconds: '{}'", linkId, idx, time, _timeBinSizeInSeconds); } - return link.getFreespeed(); + return 0d; } return timePerHour[idx]; } - private int getOffset(double time){ - return (int)Math.round(Math.floor(time / _timeBinSizeInSeconds)); + + @Override + public double getLinkTravelTime(Link link, double time, Person person, Vehicle vehicle) { + final int linkId = Integer.parseInt(link.getId().toString()); + return getLinkTravelTime(linkId, time); + } + + private int getOffset(double time) { + return (int) (time / _timeBinSizeInSeconds); } public static double[][] initTravelTime(final Map linkIdToTravelTimeData) { @@ -70,7 +71,13 @@ public static double[][] initTravelTime(final Map linkIdToTrav }); return linkIdToTravelTimeArray; } + + @Override + public double getLinkTravelTime(int linkId, double time, double linkLengthMeters) { + return getLinkTravelTime(linkId, time); + } } + private static final Logger log = LoggerFactory.getLogger(TravelTimeCalculatorHelper.class); public static Map GetLinkIdToTravelTimeArray(Collection links, TravelTime travelTime, int maxHour) { @@ -121,7 +128,7 @@ public static Map AverageTravelTimesMap(Map return result; } - public static TravelTime CreateTravelTimeCalculator(int timeBinSizeInSeconds, Map linkIdToTravelTimeData) { + public static BeamTravelTime CreateTravelTimeCalculator(int timeBinSizeInSeconds, Map linkIdToTravelTimeData) { return new TravelTimePerHour(timeBinSizeInSeconds, linkIdToTravelTimeData); } } \ No newline at end of file diff --git a/src/main/python/__init__.py b/src/main/python/__init__.py new file mode 100644 index 00000000000..bbb3e1fe3e4 --- /dev/null +++ b/src/main/python/__init__.py @@ -0,0 +1,6 @@ +# python/__init__.py +# exposed key functionality: +from .utils import compare_config_files +from .utils import log_filter_script +from .utils import study_area_config +from .utils import files_utils \ No newline at end of file diff --git a/src/main/python/atlas/process_vehicle_types.py b/src/main/python/atlas/process_vehicle_types.py new file mode 100644 index 00000000000..4f6bd413445 --- /dev/null +++ b/src/main/python/atlas/process_vehicle_types.py @@ -0,0 +1,54 @@ +import os +import re +import pandas as pd + + +def filter_vehicles_by_year(file_path, max_year=2018): + """ + Reads a vehicle types CSV file using pandas and filters out vehicles with IDs + where the year is greater than max_year. + + Args: + file_path (str): Path to the CSV file + max_year (int): Maximum year to include (default: 2018) + + Returns: + pandas.DataFrame: DataFrame containing filtered vehicle data + """ + # Read the CSV file + df = pd.read_csv(file_path) + + # Extract year from vehicleTypeId and create a filter + def extract_year(vehicle_id): + match = re.match(r'^(\d{4})_', str(vehicle_id)) + if match: + return int(match.group(1)) + return None + + # Apply the year extraction to create a new column + df['year'] = df['vehicleTypeId'].apply(extract_year) + + # Filter out vehicles with year > max_year + filtered_df = df[df['year'].isna() | (df['year'] <= max_year)] + + # Drop the temporary year column + filtered_df = filtered_df.drop(columns=['year']) + + return filtered_df + + +if __name__ == "__main__": + file_path = os.path.expanduser( + "~/Workspace/Simulation/sfbay/vehicle-tech/vehicleTypes--atlas--baseline-projection.csv") + filtered_df = filter_vehicles_by_year(file_path, 2023) + print(f"Found {len(filtered_df)} vehicles after filtering") + + # Print some examples of what was kept + for i, (index, row) in enumerate(filtered_df.head().iterrows()): + print(f"{i + 1}. Vehicle ID: {row.get('vehicleTypeId')}") + + # Save the filtered data to a new CSV file + dir_path = os.path.dirname(file_path) + output_path = f"{dir_path}/vehicleTypes--atlas--2023-Baseline.csv" + filtered_df.to_csv(output_path, index=False) + print(f"Filtered data saved to: {output_path}") \ No newline at end of file diff --git a/src/main/python/emissions/_beam_emissions_plotting.py b/src/main/python/emissions/_beam_emissions_plotting.py new file mode 100644 index 00000000000..90e2e3298ed --- /dev/null +++ b/src/main/python/emissions/_beam_emissions_plotting.py @@ -0,0 +1,672 @@ +import gzip +import io +import os +import warnings + +import contextily as cx +import geopandas as gpd +import h3 +import matplotlib.colors as mcolors +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +from matplotlib.colors import LogNorm +from shapely.geometry import Polygon +from tqdm import tqdm +from tqdm.auto import tqdm + +# Fuel Color Map +fuel_color_map = { + 'Elec': '#4169E1', # Royal Blue + 'H2fc': '#6495ED', # Cornflower Blue + 'Phe': '#87CEEB', # Sky Blue + 'NG': '#B0E0E6', # Pale Blue + 'BioDsl': '#98FB98', # Pale Green + 'Dsl': '#FFD700', # Gold + 'Gas': '#708090' # Slate Gray +} + +process_color_map = { + 'IDLEX': '#fde725', # Light yellow + 'RUNEX': '#7ad151', # Light green + 'PMBW': '#22a884', # Teal + 'PMTW': '#2a788e', # Blue-green + 'STREX': '#8e0152', # Dark magenta + 'RUNLOSS': '#4b0082', # Indigo + 'HOTSOAK': '#414487', # Purple-blue + 'DIURN': '#440154', # Dark purple +} + + +def remove_outliers_zscore(df, column, threshold=3): + mean = df[column].mean() + std = df[column].std() + z_scores = np.abs((df[column] - mean) / std) + df_filtered = df[z_scores < threshold].copy() + removed_rows = df[~df.index.isin(df_filtered.index)] + summary_df = pd.DataFrame({ + 'column': [column], + 'mean': [mean], + 'std': [std], + 'num_outliers': [len(removed_rows)] + }) + print(summary_df) + print(removed_rows) + return df_filtered + +def darken_color(color, factor=0.8): + rgb = mcolors.to_rgb(color) + return tuple(max(0, c * factor) for c in rgb) + + +def plot_hourly_emissions_by_scenario_class_fuel(emissions_skims, pollutant, output_dir, plot_legend, height_size, font_size): + data = emissions_skims[emissions_skims['pollutant'] == pollutant].copy() + grouped_data = data.groupby(['scenario', 'hour', 'class', 'emfacFuel'])['rate'].sum().reset_index() + + plt.figure(figsize=(20, height_size)) + + grouped_data['fuel_class'] = grouped_data['emfacFuel'].astype(str) + ', ' + grouped_data['class'].astype(str) + scenarios = grouped_data['scenario'].unique() + fuel_classes = sorted(grouped_data['fuel_class'].unique()) + all_hours = sorted(grouped_data['hour'].unique()) + + + # Create color map for fuel_classes + fuel_class_colors = {} + for fc in fuel_classes: + fuel, vehicle_class = fc.split(',') + fuel = fuel.strip() + vehicle_class = vehicle_class.strip() + base_color = fuel_color_map[fuel] # Default to black if fuel not found + if any(c in vehicle_class for c in ['7', '8']): + fuel_class_colors[fc] = darken_color(base_color) + else: + fuel_class_colors[fc] = base_color + + x = np.arange(len(all_hours)) + width = 0.35 / len(scenarios) + + scenarios_labeling = [] + for i, scenario in enumerate(scenarios): + scenarios_labeling.append(scenario) + scenario_data = grouped_data[grouped_data['scenario'] == scenario] + bottom = np.zeros(len(all_hours)) + for fuel_class in fuel_classes: + fuel_class_data = scenario_data[scenario_data['fuel_class'] == fuel_class] + # Create an array of rates for all hours, filling with zeros where data is missing + rates = np.zeros(len(all_hours)) + for _, row in fuel_class_data.iterrows(): + hour_index = all_hours.index(row['hour']) + rates[hour_index] = row['rate'] + + # Add edgecolor and linewidth parameters to create a subtle border + plt.bar(x + i * width, rates, width, bottom=bottom, + label=f"{fuel_class}" if i == 0 else "", + color=fuel_class_colors[fuel_class], + edgecolor='black', # Add black edge color + linewidth=0.5) # Adjust linewidth as needed + bottom += rates + + plt.title( + f'{pollutant.replace("_", ".")} Emissions: {" vs. ".join(scenarios_labeling)}', + fontsize=font_size+4) + plt.xlabel('Hour', fontsize=font_size) + plt.ylabel('Emissions (Metric Tons)', fontsize=font_size) + plt.xticks(x + width * (len(scenarios) - 1) / 2, all_hours, fontsize=font_size) + plt.yticks(fontsize=24) + if plot_legend: + plt.legend(title='Fuel, Class', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=font_size+4, title_fontsize=font_size+4) + plt.grid(axis='y', linestyle='--', alpha=0.7) + + plt.tight_layout() + plt.savefig(f'{output_dir}/{pollutant.lower()}_emissions_by_scenario_hour_class_fuel.png', dpi=300, bbox_inches='tight') + + +def plot_hourly_activity(tours_types, fuel_beam2emfac_map, output_dir, height_size): + # Preprocess data + tours_types['class'] = tours_types['vehicleCategory'].str.replace('Vocational|Tractor', '', regex=True).str.strip() + tours_types['fuel'] = tours_types['primaryFuelType'].str.lower().map(fuel_beam2emfac_map) + tours_types['fuel'] = np.where((tours_types['fuel'] == "Elec") & tours_types['secondaryFuelType'].notna(), 'Phe', + tours_types['fuel']) + tours_types['fuel_class'] = tours_types['fuel'] + '-' + tours_types['class'] + tours_types['departure_hour'] = (tours_types['departureTimeInSec'] / 3600).astype(int) % 24 + # Group by scenario, hour, and fuel_class, count the number of tours + hourly_activity = tours_types.groupby(['scenario', 'departure_hour', 'fuel_class']).size().unstack( + level=[0, 2], fill_value=0 + ) + + scenarios = tours_types['scenario'].unique() + # If the DataFrame is empty, create a default one with all hours + if hourly_activity.empty: + fuel_classes = tours_types['fuel_class'].unique() + index = pd.Index(range(24), name='departure_hour') + columns = pd.MultiIndex.from_product([scenarios, fuel_classes], names=['scenario', 'fuel_class']) + hourly_activity = pd.DataFrame(0, index=index, columns=columns) + else: + # Ensure all hours are present + for hour in range(24): + if hour not in hourly_activity.index: + hourly_activity.loc[hour] = 0 + hourly_activity = hourly_activity.sort_index() + + # Create the plot + plt.figure(figsize=(20, height_size)) + x = np.arange(24) # 24 hours + width = 0.35 # width of the bars + scenarios = hourly_activity.columns.levels[0] + + # Get all unique fuel classes across all scenarios + all_fuel_classes = set() + for scenario in scenarios: + all_fuel_classes.update(hourly_activity[scenario].columns) + + fuel_order = list(fuel_color_map.keys()) + # Sort fuel classes based on the defined order + sorted_fuel_classes = sorted(all_fuel_classes, + key=lambda x: ( + fuel_order.index(x.split('-')[0]) if x.split('-')[0] in fuel_order else len( + fuel_order), x)) + + # Create a color map for all fuel types + #color_map = {fuel: fuel_color_map[fuel] for fuel in fuel_order} + color_map = {} + for fc in sorted_fuel_classes: + fuel, vehicle_class = fc.split('-') + base_color = fuel_color_map[fuel] # Default to black if fuel not found + if any(c in vehicle_class for c in ['7', '8']): + color_map[fc] = darken_color(base_color) + else: + color_map[fc] = base_color + + print("Sorted fuel classes:", sorted_fuel_classes) + print("Color map:", color_map) + + # Plot stacked bars for each scenario + legend_handles = [] + legend_labels = [] + for i, scenario in enumerate(scenarios): + bottom = np.zeros(24) + for fuel_class in sorted_fuel_classes: + color = color_map[fuel_class] + + if fuel_class in hourly_activity[scenario].columns: + values = hourly_activity[scenario][fuel_class] + else: + values = np.zeros(24) + + bar = plt.bar(x + i * width, values, width, bottom=bottom, color=color, edgecolor='black', linewidth=0.5) + bottom += values + + if fuel_class not in legend_labels: + legend_handles.append(bar) + legend_labels.append(fuel_class) + + # plt.title(f'Weekday Tour Activity by Fuel, Class and Scenario: {" vs ".join(scenarios).replace("_", " ")}', fontsize=24) + plt.xlabel('Hour', fontsize=24) + plt.ylabel('Number of Tours Departing', fontsize=24) + plt.xticks(x + width / 2, range(24), fontsize=24) + plt.yticks(fontsize=12) + + # Create legend with ordered fuel classes + plt.legend(legend_handles, legend_labels, fontsize=28, loc='upper left', bbox_to_anchor=(1, 1)) + + plt.grid(axis='y', linestyle='--', alpha=0.7) + + # Adjust layout and save + plt.tight_layout() + plt.savefig(f'{output_dir}/hourly_activity_by_scenario_fuel_class.png', dpi=300, bbox_inches='tight') + plt.close() + + print(f"Plot saved as {output_dir}/hourly_activity_by_scenario_fuel_class.png") + + +def plot_hourly_vmt(df, output_dir, height_size): + # Preprocess the data + df['fuel_class'] = df['beamFuel'].astype(str) + '-' + df['class'].astype(str) + df['hour'] = df['hour'].astype(int) % 24 + df['mvmt'] = df['vmt'] / 1e6 + + scenarios = df['scenario'].unique() + + hourly_vmt = df.groupby(['scenario', 'hour', 'fuel_class'])['mvmt'].sum().unstack( + level=[0, 2], fill_value=0 + ).copy().reset_index() + + # Ensure all hours are present + for hour in range(24): + if hour not in hourly_vmt.index: + hourly_vmt.loc[hour] = 0 + hourly_vmt = hourly_vmt.sort_index() + + # Create the plot + plt.figure(figsize=(20, height_size)) + x = np.arange(24) # 24 hours + width = 0.35 # width of the bars + + # Get all unique fuel classes across all scenarios + all_fuel_classes = set() + for scenario in scenarios: + all_fuel_classes.update(hourly_vmt[scenario].columns) + + fuel_order = list(fuel_color_map.keys()) + # Sort fuel classes based on the defined order + sorted_fuel_classes = sorted(all_fuel_classes, + key=lambda x: ( + fuel_order.index(x.split('-')[0]) if x.split('-')[0] in fuel_order else len( + fuel_order), x)) + + + # Create color map for fuel_classes + color_map = {} + for fc in sorted_fuel_classes: + fuel, vehicle_class = fc.split('-') + base_color = fuel_color_map[fuel] # Default to black if fuel not found + if any(c in vehicle_class for c in ['7', '8']): + color_map[fc] = darken_color(base_color) + else: + color_map[fc] = base_color + + # Plot stacked bars for each scenario + legend_handles = [] + legend_labels = [] + for i, scenario in enumerate(scenarios): + bottom = np.zeros(24) + for fuel_class in sorted_fuel_classes: + if fuel_class in hourly_vmt[scenario].columns: + values = hourly_vmt[scenario][fuel_class] + else: + values = np.zeros(24) + + bar = plt.bar(x + i * width, values, width, bottom=bottom, color=color_map[fuel_class], edgecolor='black', linewidth=0.5) + bottom += values + + if fuel_class not in legend_labels: + legend_handles.append(bar) + legend_labels.append(fuel_class) + + # plt.title(f'Weekday VMT by Fuel, Class and Scenario: {" vs ".join(scenarios).replace("_", " ")}', fontsize=20) + plt.xlabel('Hour', fontsize=24) + plt.ylabel('Million Vehicle Miles Traveled', fontsize=24) + plt.xticks(x + width / 2, range(24), fontsize=24) + plt.yticks(fontsize=24) + + # Create legend with ordered fuel classes + plt.legend(legend_handles, legend_labels, title='Fuel, Class', fontsize=28, loc='upper left', bbox_to_anchor=(1, 1)) + plt.grid(axis='y', linestyle='--', alpha=0.7) + + # Adjust layout and save + plt.tight_layout() + plt.savefig(f'{output_dir}/hourly_vmt_by_scenario_fuel_class.png', dpi=300, bbox_inches='tight') + plt.close() + + print(f"Hourly VMT plot saved as {output_dir}/hourly_vmt_by_scenario_fuel_class.png") + + +def plot_h3_heatmap(df, df_col, scenario, output_dir, is_delta, remove_outliers, in_log_scale): + """Create a heatmap using the H3 grid structure with linear or logarithmic color scale and a base map.""" + subset_df = df[df["scenario"] == scenario] + if remove_outliers: + subset_df = remove_outliers_zscore(subset_df, df_col) + + # Create polygons for all H3 cells in the result + polygons = [Polygon(h3.h3_to_geo_boundary(h3_cell, geo_json=True)) for h3_cell in subset_df['h3_cell']] + + # Create GeoDataFrame + gdf = gpd.GeoDataFrame({ + 'h3_cell': subset_df['h3_cell'], + 'h3_var': subset_df[df_col], + 'geometry': polygons + }) + gdf = gdf.set_crs("EPSG:4326") + + # Convert to Web Mercator projection for compatibility with contextily + gdf_mercator = gdf.to_crs(epsg=3857) + + # Create figure and axis + fig, ax = plt.subplots(figsize=(15, 10)) + + vmin, vmax = gdf_mercator['h3_var'].min(), gdf_mercator['h3_var'].max() + + if in_log_scale: + if is_delta: + norm = mcolors.SymLogNorm(linthresh=1e-5, vmin=vmin, vmax=vmax) + else: + gdf_mercator = gdf_mercator[gdf_mercator['h3_var'] > 0] + vmin, vmax = gdf_mercator['h3_var'].min(), gdf_mercator['h3_var'].max() + norm = LogNorm(vmin=vmin, vmax=vmax) + label_suffix = "in log scale" + file_suffix = "log" + else: + if is_delta: + norm = mcolors.TwoSlopeNorm(vmin=vmin, vcenter=0, vmax=vmax) + else: + norm = None + label_suffix = "" + file_suffix = "linear" + + # Choose colormap based on whether it's a delta calculation + if is_delta: + cmap = mcolors.LinearSegmentedColormap.from_list("", ["blue", "lightblue", "white", "pink", "red"]) + else: + cmap = plt.get_cmap('viridis') + + # Plot cells with data + gdf_mercator.plot(column='h3_var', ax=ax, legend=False, cmap=cmap, edgecolor='none', norm=norm, alpha=0.7) + + # Add base map + cx.add_basemap(ax, source=cx.providers.CartoDB.Positron) + + # Add colorbar + sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm) + sm.set_array([]) + if is_delta: + cbar = fig.colorbar(sm, ax=ax, extend='both') + else: + cbar = fig.colorbar(sm, ax=ax, extend='max') + + cbar.ax.tick_params(labelsize=14) + cbar.set_label(f'{df_col.replace("_", ".")} {label_suffix}', rotation=270, labelpad=15, fontsize=18) + + # Set title and adjust plot + # plt.title(f'Emissions Distribution of {df_col.replace("_", ".")}, {scenario} ', fontsize=16) + ax.set_axis_off() + plt.tight_layout() + + # Save figure + outlier_status = "no_outliers" if remove_outliers else "with_outliers" + file_name = f'{output_dir}/{df_col.replace(" ", "_").lower()}_{scenario.replace(" ", "_").lower()}_heatmap_{file_suffix}_{outlier_status}_with_basemap.png' + plt.savefig(file_name, dpi=300, bbox_inches='tight') + plt.close() + print(f"Heatmap with base map saved as {file_name}") + + +def create_h3_histogram(df, output_dir, pollutant, scenario, remove_outliers, in_log_scale): + subset_df = df[df["scenario"] == scenario] + if remove_outliers: + subset_df = remove_outliers_zscore(subset_df, pollutant) + # Extract pollutant values + pollutant_values = subset_df[pollutant].values + + # Create the histogram + plt.figure(figsize=(12, 6)) + + if in_log_scale: + # Use log-spaced bins, but with adjustments for potential zero values + bins = np.logspace(np.log10(pollutant_values.min() + 1e-10), + np.log10(pollutant_values.max()), + num=50) + x_label = f'{pollutant.replace("_", ".")} Emissions (log scale)' + title_label = f'Histogram of {pollutant.replace("_", ".")} Emissions by H3 Cell (Log Scale)' + file_name = f'{output_dir}/{pollutant}_{scenario.replace(" ","_").lower()}_emissions_histogram_log.png' + else: + # Use automatic binning based on Sturges' rule + bins = 'sturges' + x_label = f'{pollutant.replace("_", ".")} Emissions' + title_label = f'Histogram of {pollutant.replace("_", ".")} Emissions by H3 Cell' + file_name = f'{output_dir}/{pollutant}_{scenario.replace(" ","_").lower()}_emissions_histogram.png' + + plt.hist(pollutant_values, bins=bins, edgecolor='black') + + # Set x-axis to log scale if specified + if in_log_scale: + plt.xscale('log') + + # Set labels and title + plt.xlabel(x_label, fontsize=12) + plt.ylabel('Frequency', fontsize=12) + plt.title(title_label, fontsize=14) + + # Add grid for better readability + plt.grid(True, linestyle='--', alpha=0.7) + + # Adjust layout and save + plt.tight_layout() + plt.savefig(file_name, dpi=300, bbox_inches='tight') + plt.close() + print(f"Histogram saved as {file_name}/") + + +def fast_df_to_gzip(df, output_file, compression_level=5, chunksize=100000): + """ + Write a pandas DataFrame to a compressed CSV.gz file quickly with a progress bar. + + :param df: pandas DataFrame to write + :param output_file: path to the output .csv.gz file + :param compression_level: gzip compression level (1-9, 9 being highest) + :param chunksize: number of rows to write at a time + """ + total_rows = len(df) + + with gzip.open(output_file, 'wt', compresslevel=compression_level) as gz_file: + # Write header + gz_file.write(','.join(df.columns) + '\n') + + # Write data in chunks + with tqdm(total=total_rows, desc="Writing to gzip", unit="rows") as pbar: + for start in range(0, total_rows, chunksize): + end = min(start + chunksize, total_rows) + chunk = df.iloc[start:end] + + csv_buffer = io.StringIO() + chunk.to_csv(csv_buffer, index=False, header=False) + gz_file.write(csv_buffer.getvalue()) + + pbar.update(end - start) + + +def plot_multi_pie_emfac_famos_vmt(data, plot_dir): + def assign_color(fuel_class): + return fuel_color_map[fuel_class.split('-')[0]] + + models = data["model"].unique() + + emfac_data = data[data['model'] == 'emfac'].sort_values('mvmt', ascending=False) + famos_data = data[data['model'] == 'famos'].sort_values('mvmt', ascending=False) + + all_fuel_classes = set(emfac_data['fuel_class']) | set(famos_data['fuel_class']) + for fuel_class in all_fuel_classes: + if fuel_class not in emfac_data['fuel_class'].values: + emfac_data = pd.concat( + [emfac_data, pd.DataFrame({'fuel_class': [fuel_class], 'model': ['EMFAC'], 'mvmt': [0]})], + ignore_index=True) + if fuel_class not in famos_data['fuel_class'].values: + famos_data = pd.concat( + [famos_data, pd.DataFrame({'fuel_class': [fuel_class], 'model': ['FAMOS'], 'mvmt': [0]})], + ignore_index=True) + + emfac_data = emfac_data.sort_values('fuel_class') + famos_data = famos_data.sort_values('fuel_class') + + if emfac_data['mvmt'].sum() == 0 and famos_data['mvmt'].sum() == 0: + print("Error: All VMT values are zero. Cannot create pie chart.") + return + + fig, ax = plt.subplots(figsize=(14, 10)) + size = 0.3 + outer_radius = 1 + inner_radius = outer_radius - size + outer_colors = [assign_color(fuel_class) for fuel_class in famos_data['fuel_class']] + inner_colors = [assign_color(fuel_class) for fuel_class in emfac_data['fuel_class']] + + def make_autopct(values): + def my_autopct(pct): + return f'{pct:.1f}%' if pct >= 1 else '' + + return my_autopct + + def add_labels(wedges, fuel_classes, autopct, colors, radius, inner=False): + for wedge, fuel_class, color in zip(wedges, fuel_classes, colors): + ang = (wedge.theta2 + wedge.theta1) / 2 + pct = wedge.theta2 - wedge.theta1 + if pct * 100 / 360 >= 1: # Only show labels for slices >= 1% + label = autopct(pct * 100 / 360) + theta = np.deg2rad(ang) + + if inner: + start_point = ((inner_radius - size) * np.cos(theta), (inner_radius - size) * np.sin(theta)) + end_point = (0.4 * np.cos(theta), 0.4 * np.sin(theta)) + + bbox_props = dict(boxstyle="round,pad=0.3", fc=color, ec="k", lw=0.72, alpha=0.7) + arrowprops = dict(arrowstyle="-", connectionstyle=f"arc3,rad=0", color='k') + + ax.annotate(f'{fuel_class}\n{label}', xy=start_point, xytext=end_point, + horizontalalignment='center', + verticalalignment='center', + bbox=bbox_props, arrowprops=arrowprops, + fontsize=16) + else: + x = (radius + size / 2 + 0.05) * np.cos(theta) + y = (radius + size / 2 + 0.05) * np.sin(theta) + + bbox_props = dict(boxstyle="round,pad=0.3", fc=color, ec="k", lw=0.72, alpha=0.7) + ax.annotate(f'{fuel_class}\n{label}', xy=(x, y), xytext=(x, y), + horizontalalignment='center', + verticalalignment='center', + bbox=bbox_props, + fontsize=16) + + wedges_outer, texts_outer, autotexts_outer = ax.pie(famos_data['mvmt'], radius=outer_radius, colors=outer_colors, + labels=None, autopct='', pctdistance=0.85, + labeldistance=1.1, + wedgeprops=dict(width=size, edgecolor='white')) + + add_labels(wedges_outer, famos_data['fuel_class'], make_autopct(famos_data['mvmt']), outer_colors, outer_radius) + + wedges_inner, texts_inner, autotexts_inner = ax.pie(emfac_data['mvmt'], radius=inner_radius, colors=inner_colors, + labels=None, autopct='', pctdistance=0.75, + wedgeprops=dict(width=size, edgecolor='white')) + + add_labels(wedges_inner, emfac_data['fuel_class'], make_autopct(emfac_data['mvmt']), inner_colors, inner_radius, inner=True) + + # ax.set_title('VMT Share by Fuel-Class: FAMOS (outer) vs EMFAC (inner)', fontsize=16) + + # handles = [plt.Rectangle((0, 0), 1, 1, fc="w", ec="k", lw=2, alpha=0.5) for _ in range(2)] + # labels = ['FAMOS (Outer)', 'EMFAC (Inner)'] + # ax.legend(handles, labels, title="Models", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1)) + + plt.tight_layout() + output_file = os.path.join(plot_dir, f"{'_'.join(models)}_vmt_multi_level_pie_chart.png") + plt.savefig(output_file, bbox_inches='tight', dpi=300) + plt.close() + print(f"Chart has been saved as '{output_file}'") + + +def plot_pollution_variability_by_process_vehicle_types(skims, pollutant, scenario, output_dir, height_size, font_size): + warnings.filterwarnings("ignore", category=FutureWarning, module="seaborn") + # Filter data for specified scenario and pollutant + data = skims[(skims['scenario'] == scenario) & (skims['pollutant'] == pollutant)].copy() + processes = sorted(skims["process"].unique().tolist()) + + # Create fuel_class category + data['fuel_class'] = data['emfacFuel'].astype(str) + ', ' + data['class'].astype(str) + data['rate_micro_gram'] = data['rate'] * 1e12 + + # Sort fuel_class by median emission rate + fuel_class_order = data.groupby('fuel_class')['rate_micro_gram'].median().sort_values(ascending=False).index + + # Set up the plot + fig, ax = plt.subplots(figsize=(20, height_size)) + + # Create color map for fuel_classes + fuel_class_colors = {} + for fc in data['fuel_class'].unique(): + fuel, vehicle_class = fc.split(',') + fuel = fuel.strip() + vehicle_class = vehicle_class.strip() + base_color = fuel_color_map[fuel] # Default to black if fuel not found + if any(c in vehicle_class for c in ['7', '8']): + fuel_class_colors[fc] = darken_color(base_color) + else: + fuel_class_colors[fc] = base_color + + # Create the box plot with adjusted parameters + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=FutureWarning) + sns.boxplot(x='process', y='rate_micro_gram', hue='fuel_class', data=data, + order=processes, hue_order=fuel_class_order, + palette=fuel_class_colors, + ax=ax, whis=1.5, fliersize=2, showcaps=True, showfliers=True) + + # Add strip plot for additional data points + sns.stripplot(x='process', y='rate_micro_gram', hue='fuel_class', data=data, + order=processes, hue_order=fuel_class_order, + palette=fuel_class_colors, + ax=ax, size=1, jitter=True, dodge=True, alpha=0.3) + + # Customize the plot + ax.set_title(f'{pollutant.replace("_", ".")} Emissions Variability - {scenario}', fontsize=font_size+4) + ax.set_xlabel('Process', fontsize=font_size) + ax.set_ylabel('Microgram per road link', fontsize=font_size) + ax.tick_params(axis='both', which='major', labelsize=font_size) + + # Rotate x-axis labels if needed + plt.setp(ax.get_xticklabels(), rotation=0, ha='right') + + # Move the legend outside the plot + ax.legend(title='Fuel, Class', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=font_size) + + # Use log scale for y-axis if the range of values is large + min_rate = data['rate_micro_gram'].min() + max_rate = data['rate_micro_gram'].max() + + if min_rate <= 0: + print(f"Warning: Minimum rate is {min_rate}, which is zero or negative. Using log scale by default.") + ax.set_yscale('log') + # Set a small positive value for the bottom of the y-axis + ax.set_ylim(bottom=1e-10) # You might need to adjust this value + scale_label = "log" + elif max_rate / min_rate > 1000: + print(f"Using log scale. Max/min ratio: {max_rate/min_rate}") + ax.set_yscale('log') + scale_label = "log" + else: + print(f"Using linear scale. Max/min ratio: {max_rate/min_rate}") + scale_label = "linear" + + plt.tight_layout() + plt.savefig(f'{output_dir}/{pollutant.lower()}_variability_by_process_fuel_class_{scenario.replace(" ", "_").lower()}_{scale_label}_scale.png', dpi=300, bbox_inches='tight') + plt.close() + + +def plot_pollutants_by_process(skims, scenario, plot_dir, height_size, font_size): + # Define process order and color map based on toxicity + process_order = list(process_color_map.keys()) + # Group by pollutant and process, and sum the rates + grouped = skims[skims["scenario"] == scenario].groupby(['pollutant', 'process'])['rate'].sum().unstack() + + # Reorder columns based on process_order + grouped = grouped.reindex(columns=process_order) + + # Normalize the data + normalized = grouped.div(grouped.sum(axis=1), axis=0) + normalized = normalized * 100 + + # Create the stacked bar plot + fig, ax = plt.subplots(figsize=(20, height_size)) + normalized.plot(kind='bar', stacked=True, ax=ax, color=[process_color_map[col] for col in normalized.columns]) + + # Customize the plot + plt.title(f'Normalized Emissions by Process - {scenario}', fontsize=font_size+4) + plt.xlabel('Pollutant', fontsize=font_size) + plt.ylabel('Relative Emissions (%)', fontsize=font_size) + plt.xticks(rotation=0, ha='center', fontsize=font_size) + plt.yticks(fontsize=font_size) + + ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: '{:.0f}%'.format(y))) + ax.set_ylim(0, 100) + + legend = plt.legend(title='Process', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=font_size) + plt.setp(legend.get_title(), fontsize=font_size) + plt.tight_layout() + + # Save the plot + plt.savefig( + f'{plot_dir}/pollutant_by_process_{scenario.replace(" ", "_").lower()}.png', + dpi=300, + bbox_inches='tight' + ) + + # Show the plot + plt.show() + diff --git a/src/main/python/emissions/_emfac_and_emissions_rates_processing.py b/src/main/python/emissions/_emfac_and_emissions_rates_processing.py new file mode 100644 index 00000000000..5b48d487a5a --- /dev/null +++ b/src/main/python/emissions/_emfac_and_emissions_rates_processing.py @@ -0,0 +1,935 @@ +import os +import sys +from multiprocessing import Pool + +import numpy as np +import pandas as pd +import pyarrow as pa +import pyarrow.csv as csv +from tqdm import tqdm +from tqdm.auto import tqdm + +# Get the absolute path to the directory containing this script +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(os.path.dirname(current_dir)) +sys.path.insert(0, parent_dir) + +# Now use absolute import +from python.utils.files_utils import check_files + +# Now use absolute import +emissions_processes = [ + "RUNEX", + "IDLEX", + "STREX", + "DIURN", + "HOTSOAK", + "RUNLOSS", + "PMTW", + "PMBW", + "PRDUST" +] + +pollutant_columns = { + 'CH4': 'rate_ch4_gram_float', + 'CO': 'rate_co_gram_float', + 'CO2': 'rate_co2_gram_float', + 'HC': 'rate_hc_gram_float', + 'NH3': 'rate_nh3_gram_float', + 'NOx': 'rate_nox_gram_float', + 'PM': 'rate_pm_gram_float', + 'PM10': 'rate_pm10_gram_float', + 'PM2_5': 'rate_pm2_5_gram_float', + 'ROG': 'rate_rog_gram_float', + 'SOx': 'rate_sox_gram_float', + 'TOG': 'rate_tog_gram_float', + 'BC_V1': 'rate_bc_gram_float', + 'BC_V2': 'rate_bcm_gram_float', + 'BC_V3': 'rate_bch_gram_float' +} + +def calculate_road_dust_emissions(silt_loading, rainy_days): + """ + Calculate road dust emissions based on EPA AP-42 methodology. + + Parameters: + silt_loading (float): Roadway-specific silt loading in grams/square meter + rainy_days (int): Number of wet days in the year + + Returns: + tuple: PM2.5, PM10, and total PM emission factors in grams/vehicle-mile + """ + # Constants + k = 0.0022 # particle size multiplier for PM10 in lb/VMT + W = 2.4 # average weight of vehicles in tons + N = 365 # number of days in annual averaging period + + # Fractions of pollutants among road dust + pm_25_frac = 0.0686 + pm_10_frac = 0.4572 + pm_frac = 0.5428 + + # Calculate PM10 emission factor in lb/VMT + E_10 = k * (silt_loading ** 0.91) * (W ** 1.02) * (1 - rainy_days / N / 4) + + # Calculate total PM emission factor + E_total = E_10 / pm_10_frac + + # Calculate PM2.5 emission factor + E_25 = E_total * pm_25_frac + + # Convert from lb/VMT to g/VMT (1 lb = 453.592 g) + E_25_g = E_25 * 453.592 + E_10_g = E_10 * 453.592 + E_total_g = E_total * 453.592 + + return E_25_g, E_10_g, E_total_g + + +def generate_road_dust_rates(rainy_days_file, silt_loading_file, air_basin_region): + """ + Process rainy days and silt loading data to create road dust emission rates. + + Parameters: + rainy_days_file (str): Path to the rainy days CSV file + silt_loading_file (str): Path to the silt loading CSV file + air_basin_region (list): List of air basins to filter by + + Returns: + pd.DataFrame: DataFrame with road dust emission rates + """ + # Map BEAM/OSM road types to CARB silt loading road categories + silt_beam2carb_map = { + 'motorway': 'Freeway', + 'motorway_link': 'Freeway', + 'trunk': 'Freeway', + 'trunk_link': 'Major', + 'primary': 'Major', + 'primary_link': 'Major', + 'secondary': 'Collector', + 'secondary_link': 'Collector', + 'tertiary': 'Collector', + 'tertiary_link': 'Collector', + 'unclassified': 'Collector', + 'residential': 'Local Urban' + } + + # Load silt loading data + silt_loading_df = pd.read_csv(silt_loading_file) + + # Ensure consistent county names across datasets + silt_loading_df['County'] = silt_loading_df['County'].str.strip().str.lower() + silt_loading_df['Air Basin'] = silt_loading_df['Air Basin'].str.strip() + silt_filtered_df = silt_loading_df[silt_loading_df['Air Basin'].isin(air_basin_region)] + if silt_filtered_df.empty: + raise ValueError(f"No data found in silt loading for the specified air basins: {air_basin_region}") + road_categories = ['Freeway', 'Major', 'Collector', 'Local Urban', 'Local Rural'] + county_averages = silt_filtered_df.groupby('County')[road_categories].mean().reset_index() + county_averages = county_averages.sort_values('County') + + # Load rainy days data + rainy_days_df = pd.read_csv(rainy_days_file) + rainy_days_df['County'] = rainy_days_df['County'].str.strip().str.lower() + rainy_days_df['Air Basin'] = rainy_days_df['Air Basin'].str.strip() + rainy_filtered_df = rainy_days_df[rainy_days_df['Air Basin'].isin(air_basin_region)] + if rainy_filtered_df.empty: + raise ValueError(f"No data found in rainy days for the specified air basins: {air_basin_region}") + rainfall_averages = rainy_filtered_df.groupby('County')['Annual Rainfall Days'].mean().reset_index() + rainfall_averages = rainfall_averages.sort_values('County') + + # Merge county silt loading with rainy days data + merged_data = pd.merge(county_averages, rainfall_averages, on='County', how='inner') + + # Initialize lists to store emissions data for all BEAM/OSM road types + all_rows = [] + + # Calculate road dust emissions for each county and road type + print("Calculating road dust emissions...") + total_iterations = len(merged_data) * len(silt_beam2carb_map) + with tqdm(total=total_iterations, desc="Processing counties and road types") as pbar: + for _, row in merged_data.iterrows(): + county = row['County'] + rainy_days = row['Annual Rainfall Days'] + + # Create a dictionary to map CARB road categories to their silt loading values for this county + carb_road_to_silt = {road_type: row[road_type] for road_type in road_categories} + + # Process each BEAM/OSM road type + for beam_road_type, carb_road_type in silt_beam2carb_map.items(): + silt_loading = carb_road_to_silt[carb_road_type] + + # Calculate emission factors + pm25, pm10, pm_total = calculate_road_dust_emissions(silt_loading, rainy_days) + + # Create a dictionary for this row + row_dict = { + 'county': county, + 'process': 'PRDUST', + 'rate_pm2_5_gram_float': pm25, + 'rate_pm10_gram_float': pm10, + 'rate_pm_gram_float': pm_total, + 'road_category': beam_road_type, + 'carb_road_category': carb_road_type, + 'silt_loading': silt_loading, + 'rainy_days': rainy_days + } + + all_rows.append(row_dict) + pbar.update(1) + + # Create emissions DataFrame + _emissions_df = pd.DataFrame(all_rows) + + # Reorder columns to match required format + column_order = [ + 'county', + 'road_category', + 'process', + 'rate_pm_gram_float', + 'rate_pm10_gram_float', + 'rate_pm2_5_gram_float' + ] + + return _emissions_df[column_order] + +def numerical_column_to_binned_and_pivot(df_raw, numerical_colname, binned_colname, edge_values): + pivot_df = pivot_rates_for_beam(df_raw).sort_values(by='speed_time', ascending=True) + df_raw_last_row = pivot_df.iloc[-1].copy() + df_raw_last_row['speed_time'] = edge_values[1] + pivot_df = pd.concat([pivot_df, pd.DataFrame([df_raw_last_row])], ignore_index=True) + col_sorted = sorted(pivot_df[numerical_colname].unique()) + col_bins = [edge_values[0]] + col_sorted + col_labels = [f"[{col_bins[i]}, {col_bins[i + 1]})" for i in range(len(col_bins) - 1)] + pivot_df[binned_colname] = pd.cut(pivot_df[numerical_colname], bins=col_bins, labels=col_labels, right=True) + return pivot_df + +def pivot_rates_for_beam(df_raw): + unique_speed_time = df_raw.speed_time.unique() + has_non_empty_speed_time = any(len(str(x)) > 0 for x in unique_speed_time) and not pd.isnull( + unique_speed_time).all() + index_ = ["emfacId", 'county', 'process'] + if has_non_empty_speed_time: + index_.append("speed_time") + pivot_df = df_raw.pivot_table(index=index_, columns='pollutant', values='emission_rate', aggfunc='first', + fill_value=0).reset_index() + pivot_df = pivot_df.rename(columns=pollutant_columns) + # Add missing columns with default values + for col in pollutant_columns.values(): + if col not in pivot_df.columns: + pivot_df[col] = 0.0 + pivot_df.insert(0, 'speed_mph_float_bins', "") + pivot_df.insert(1, 'time_minutes_float_bins', "") + return pivot_df + + +def process_rates_group(df, row): + mask = ((df["county"] == row["county"]) & (df["emfacId"] == row["emfacId"])) + df_subset = df[mask] + df_output_list = [] + + # Add progress bar for processing each emissions process + print(f"Processing emissions for county: {row['county']}, emfacId: {row['emfacId']}") + for process in tqdm(emissions_processes, desc="Processing emission processes"): + df_temp = df_subset[df_subset['process'] == process] + if not df_temp.empty: + if process in ['RUNEX', 'PMBW']: + df_temp = numerical_column_to_binned_and_pivot(df_temp, 'speed_time', 'speed_mph_float_bins', + [0.0, 200.0]) + elif process == 'STREX': + df_temp = numerical_column_to_binned_and_pivot(df_temp, 'speed_time', 'time_minutes_float_bins', + [0.0, 3600.0]) + else: + df_temp = pivot_rates_for_beam(df_temp) + df_output_list.append(df_temp) + + return pd.concat(df_output_list, ignore_index=True) if df_output_list else pd.DataFrame() + +# Define this function at module level (outside any other function) +def process_chunk(chunk_data): + chunk, emissions_df = chunk_data + results = [] + # Process each row in the chunk + for _, row in tqdm(chunk.iterrows(), total=len(chunk), + desc=f"Processing chunk with {len(chunk)} rows", + leave=False): + result = process_rates_group(emissions_df, row) + results.append(result) + + return pd.concat(results, ignore_index=True) if results else pd.DataFrame() + + +def process_emfac_rates( + emfac_rates_by_model_year_file, + format_func, + season_month, + calendar_year, + air_basin_area, + temperature, + relative_humidity, + include_nan=True): + """ + Process EMFAC emissions rates with improved air basin filtering. + + Args: + emfac_rates_by_model_year_file: Path to EMFAC rates input file + format_func: Function to format the data + season_month: Season or month to filter by + calendar_year: Calendar year to filter by + air_basin_area: Air basin area(s) to filter by (can be list or single string) + temperature: Temperature to filter by + relative_humidity: Relative humidity to filter by + include_nan: Whether to include NaN values in filtering + + Returns: + DataFrame with processed EMFAC rates + """ + # Process the emissions data + print(f"Reading CSV file: {emfac_rates_by_model_year_file}") + table = csv.read_csv(emfac_rates_by_model_year_file, read_options=pa.csv.ReadOptions(use_threads=True)) + df = table.to_pandas() + print(f"CSV file loaded. Shape: {df.shape}") + + # Apply filters based on config + print("Applying filters...") + if 'season_month' in df.columns: + print(f"Filtering by season_month: {season_month}") + df = df[(df['season_month'] == season_month) | (include_nan & df['season_month'].isna())] + print(f"After season_month filter. Shape: {df.shape}") + + if 'calendar_year' in df.columns: + print(f"Filtering by calendar_year: {calendar_year}") + df = df[(df['calendar_year'] == calendar_year) | (include_nan & df['calendar_year'].isna())] + print(f"After calendar_year filter. Shape: {df.shape}") + + # Improved air basin area filtering to handle partial matches + if 'sub_area' in df.columns: + print(f"Filtering by air_basin_area: {air_basin_area}") + # Create a filter condition for partial matches + sub_area_filter = include_nan & df['sub_area'].isna() + + for area in air_basin_area: + # Look for exact match or area in parentheses (e.g., "Santa Clara (SF)" for "SF") + sub_area_filter = sub_area_filter | df['sub_area'].str.contains(f'\\({area}\\)', regex=True) | ( + df['sub_area'] == area) + + # Apply the filter + df = df[sub_area_filter] + print(f"After sub_area filter. Shape: {df.shape}") + + if 'temperature' in df.columns: + print(f"Filtering by temperature: {temperature}") + df = df[(df['temperature'] == temperature) | (include_nan & df['temperature'].isna())] + print(f"After temperature filter. Shape: {df.shape}") + + if 'relative_humidity' in df.columns: + print(f"Filtering by relative_humidity: {relative_humidity}") + df = df[(df['relative_humidity'] == relative_humidity) | (include_nan & df['relative_humidity'].isna())] + print(f"After relative_humidity filter. Shape: {df.shape}") + + # Group by MY_group and calculate statistics + print("Filling missing values and formatting data...") + df = df.fillna('') + df = df.reset_index(drop=True) + + print("Formatting data with provided format function...") + df_formatted = format_func(df) + + print("Grouping and calculating mean emission rates...") + group_col = ['area', 'county', 'emfacId', 'model_year_group', 'vehicle_class', 'fuel', 'process', 'speed_time', 'pollutant'] + emissions_rates = df_formatted.groupby(group_col)['emission_rate'].mean().reset_index() + print("Getting unique county/emfacId combinations...") + df_unique = emissions_rates[["county", "emfacId"]].drop_duplicates().reset_index(drop=True) + print(f"Found {len(df_unique)} unique county/emfacId combinations") + + # Parallel processing + # Use fewer, larger chunks and match to number of CPU cores + num_cores = min(os.cpu_count() or 4, 8) # Cap at 8 to prevent excessive overhead + chunks = np.array_split(df_unique, num_cores) + print(f"Starting parallel processing with {num_cores} cores...") + + # Use parallel processing with fewer, larger chunks + with Pool(num_cores) as pool: + with tqdm(total=num_cores, desc="Processing chunks") as pbar: + def update_progress(*args): + pbar.update() + return args[0] + + # Use imap to process chunks sequentially with progress updates + df_output_list = [] + for result in pool.imap(process_chunk, [(chunk, emissions_rates) for chunk in chunks]): + df_output_list.append(result) + pbar.update(1) + + # Formatting for merge + print("Combining results and finalizing data...") + df_output = pd.concat(df_output_list, ignore_index=True).drop(["speed_time"], axis=1) + + # Filter out rows where all emission columns are zero + emission_columns = [col for col in df_output.columns if col.startswith('rate_') and col.endswith('_gram_float')] + + # Count rows before filtering + total_rows_before = len(df_output) + filtered_out = df_output[(df_output[emission_columns] == 0).all(axis=1)] + df_output = df_output[~(df_output[emission_columns] == 0).all(axis=1)] + # Count rows after filtering + total_rows_after = len(df_output) + + print(f"Filtered out {total_rows_before - total_rows_after} rows where all emission columns are zero") + print(f"Final dataset has {total_rows_after} rows") + + # Reorder columns to ensure 'county' is at the front + columns = df_output.columns.tolist() + columns = ['county'] + [col for col in columns if col != 'county'] + emfac_rates = df_output[columns] + + return emfac_rates + +def process_emfac_emissions(study_area, scenario_name, work_dir, config, format_func): + # Get file paths + emfac_config = config["rates"]["emfac"] + filters_config = config["rates"]["filters"] + emfac_rates_by_model_year_file = os.path.join(work_dir, emfac_config['emfac_rates_by_model_year_file']) + emfac_emission_rate_output_file = os.path.join( + work_dir, + f"{config["rates"]["output_dir"]}/{study_area}_emfac_rates_{scenario_name}.csv" + ) + + if check_files([emfac_emission_rate_output_file], config["override_rates"]): + emfac_rates = pd.read_csv(emfac_emission_rate_output_file) + else: + emfac_rates = process_emfac_rates( + emfac_rates_by_model_year_file, + format_func, + filters_config['season_month'], + filters_config['calendar_year'], + filters_config['sub_area'], + filters_config['temperature'], + filters_config['relative_humidity'], + include_nan=filters_config["include_nan"]) + + print(f"Writing EMFAC emission rate to: {emfac_emission_rate_output_file}") + emfac_rates.to_csv(emfac_emission_rate_output_file, index=False) + + return emfac_rates + + +def process_black_carbon(study_area, scenario_name, work_dir, config, format_func): + # Get file paths + black_carbon_config = config["rates"]["black_carbon"] + filters_config = config["rates"]["filters"] + bc_rates_by_model_year_file = os.path.join(work_dir, black_carbon_config['black_carbon_rates_file']) + bc_emission_rate_output_file = os.path.join( + work_dir, + f"{config["rates"]["output_dir"]}/{study_area}_black_carbon_rates_{scenario_name}.csv" + ) + + if check_files([bc_emission_rate_output_file], config["override_rates"]): + bc_rates = pd.read_csv(bc_emission_rate_output_file) + else: + bc_rates = process_emfac_rates( + bc_rates_by_model_year_file, + format_func, + filters_config['season_month'], + filters_config['calendar_year'], + filters_config['sub_area'], + filters_config['temperature'], + filters_config['relative_humidity'], + include_nan=filters_config["include_nan"] + ) + + print(f"Writing Black Carbon emission rate to: {bc_emission_rate_output_file}") + bc_rates.to_csv(bc_emission_rate_output_file, index=False) + + return bc_rates + + +def process_road_dust(study_area, scenario_name, work_dir, config, emfac_ids): + """ + Process road dust emission rates for all EMFAC IDs. + + Args: + study_area (str): Study area name + scenario_name (str): Scenario name + work_dir (str): Working directory path + config (dict): Configuration dictionary + emfac_ids (set): Set of EMFAC IDs to process + + Returns: + pd.DataFrame: Road dust emission rates for all EMFAC IDs + """ + road_dust_config = config["rates"]["road_dust"] + filters_config = config["rates"]["filters"] + + # Get road dust file paths + _rainy_days_file = os.path.join(work_dir, road_dust_config['rainy_days_file']) + _silt_loading_file = os.path.join(work_dir, road_dust_config['silt_loading_file']) + road_dust_output_file = os.path.join( + work_dir, + f"{config["rates"]["output_dir"]}/{study_area}_paved_road_dust_rates_{scenario_name}.csv" + ) + + # Check if the output file already exists + if check_files([road_dust_output_file], config["override_rates"]): + print(f"Loading existing road dust rates from: {road_dust_output_file}") + road_dust_rates = pd.read_csv(road_dust_output_file) + print(f"Loaded road dust rates with {len(road_dust_rates)} rows") + else: + try: + # Ensure output directory exists + os.makedirs(os.path.dirname(road_dust_output_file), exist_ok=True) + + print(f"Processing road dust for air basins: {filters_config['sub_area']}") + print(f"Using rainy days file: {_rainy_days_file}") + print(f"Using silt loading file: {_silt_loading_file}") + + # Process road dust emission rates + road_dust_rates = generate_road_dust_rates(_rainy_days_file, _silt_loading_file, filters_config['sub_area']) + + print(f"Generated base road dust rates with {len(road_dust_rates)} rows") + print(f"Duplicating rates for {len(emfac_ids)} EMFAC IDs") + + # Create a list to hold all DataFrames + dfs = [] + + # Use tqdm to show progress of the duplication process + for emfac_id in tqdm(emfac_ids, desc="Creating rates for each EMFAC ID"): + temp_df = road_dust_rates.copy() + temp_df["emfacId"] = emfac_id + dfs.append(temp_df) + + # Concatenate all the DataFrames + print("Concatenating all rates...") + road_dust_rates = pd.concat(dfs, ignore_index=True) + print(f"Final road dust rates shape: {road_dust_rates.shape}") + + # Ensure output directory exists + os.makedirs(os.path.dirname(road_dust_output_file), exist_ok=True) + + print(f"Writing road dust emission rates to: {road_dust_output_file}") + road_dust_rates.to_csv(road_dust_output_file, index=False) + print("Road dust rates saved successfully") + + except Exception as e: + print(f"Error processing road dust for scenario '{scenario_name}': {str(e)}") + print("Returning empty DataFrame due to error") + return pd.DataFrame() + + return road_dust_rates + + +def process_emissions_rates(_study_area, _scenario_name, _work_dir, config, format_func): + """ + Process emissions rates for one or more scenarios based on the provided configuration. + + Args: + _study_area (str): Area for which emissions rates need to be processed + _scenario_name (str): Name of the scenario + _work_dir (str): Working directory path + config (dict): Configuration dictionary containing emission scenarios + format_func: + + Returns: + pd.DataFrame: Combined emissions rates for the scenario + """ + # File paths for outputs + rates_config = config["rates"] + combined_rate_file = os.path.join(_work_dir, f"{rates_config["output_dir"]}/{_study_area}_emissions_rates_{_scenario_name}.csv") + + # Ensure output directory exists + os.makedirs(os.path.dirname(combined_rate_file), exist_ok=True) + + if check_files([combined_rate_file], config["override_rates"]): + print(f"Loading existing combined rates from: {combined_rate_file}") + _combined_rates = pd.read_csv(combined_rate_file, dtype=str) + print(f"Loaded combined rates with {len(_combined_rates)} rows") + else: + print(f"Starting emissions rate processing for {_study_area}, scenario: {_scenario_name}") + dfs = [] + emfac_ids = set() + + # Track processing steps + steps = [] + if 'emfac' in rates_config: + steps.append('EMFAC emissions') + if 'black_carbon' in rates_config: + steps.append('Black Carbon emissions') + if 'road_dust' in rates_config: + steps.append('Road Dust emissions') + + print(f"Will process: {', '.join(steps)}") + + # Use tqdm to show progress of processing steps + with tqdm(total=len(steps), desc="Processing emission types") as pbar: + # Process EMFAC emissions if configured + if 'emfac' in rates_config: + print(f"\nProcessing EMFAC emissions for scenario '{_scenario_name}'") + emfac_rates = process_emfac_emissions(_study_area, _scenario_name, _work_dir, config, format_func) + if not emfac_rates.empty: + dfs.append(emfac_rates) + emfac_ids.update(emfac_rates["emfacId"].unique()) + print(f"Added {len(emfac_rates)} EMFAC emission rows") + else: + print("No EMFAC emissions were processed") + pbar.update(1) + else: + print(f"Skipping EMFAC processing for scenario '{_scenario_name}' as no config is provided.") + + # Process black carbon emissions if configured + if 'black_carbon' in rates_config: + print(f"\nProcessing Black Carbon emissions for scenario '{_scenario_name}'") + black_carbon_rates = process_black_carbon(_study_area, _scenario_name, _work_dir, config, format_func) + if not black_carbon_rates.empty: + dfs.append(black_carbon_rates) + emfac_ids.update(black_carbon_rates["emfacId"].unique()) + print(f"Added {len(black_carbon_rates)} Black Carbon emission rows") + else: + print("No Black Carbon emissions were processed") + pbar.update(1) + else: + print(f"Skipping Black Carbon processing for scenario '{_scenario_name}' as no config is provided.") + + # Process road dust emissions if configured + if 'road_dust' in rates_config: + print(f"\nProcessing Road Dust emissions for scenario '{_scenario_name}'") + road_dust_rates = process_road_dust(_study_area, _scenario_name, _work_dir, config, emfac_ids) + if not road_dust_rates.empty: + dfs.append(road_dust_rates) + print(f"Added {len(road_dust_rates)} Road Dust emission rows") + else: + print("No Road Dust emissions were processed") + pbar.update(1) + else: + print(f"Skipping Paved Road Dust processing for scenario '{_scenario_name}' as no config is provided.") + + if not dfs: + print(f"Warning: No emission rates available for scenario '{_scenario_name}'") + _combined_rates = pd.DataFrame() + else: + print("\nCombining all emission rates...") + + # Get counts for each type of emission + emission_counts = { + f"Source {i + 1}": len(df) for i, df in enumerate(dfs) if df is not None + } + print(f"Emission source row counts: {emission_counts}") + + # Get all unique columns from all dataframes + all_columns = set() + for df in dfs: + if df is not None: # Check that df is not None + all_columns.update(df.columns) + print(f"Total unique columns across all sources: {len(all_columns)}") + + # Filter out None values + valid_dfs = [df for df in dfs if df is not None] + print(f"Processing {len(valid_dfs)} valid dataframes") + + # Add missing columns to each dataframe + print("Adding missing columns to each dataframe...") + for i in tqdm(range(len(valid_dfs)), desc="Standardizing dataframes"): + missing_cols = all_columns - set(valid_dfs[i].columns) + for col in missing_cols: + valid_dfs[i][col] = None + print(f"Added {len(missing_cols)} missing columns to dataframe {i + 1}") + + print("Concatenating all dataframes...") + _combined_rates = pd.concat(valid_dfs, ignore_index=True) + _combined_rates["scenario"] = _scenario_name + + # Report on final combined size + print(f"Combined rates shape: {_combined_rates.shape}") + + # Specify the columns you want to appear first + first_cols = [ + "scenario", "emfacId", "county", "speed_mph_float_bins", "time_minutes_float_bins", "road_category", + "process" + ] + remaining_cols = [col for col in _combined_rates.columns if col not in first_cols] + _combined_rates = _combined_rates[first_cols + remaining_cols] + + print(f"Writing combined emission rates to: {combined_rate_file}") + _combined_rates.to_csv(combined_rate_file, index=False) + print("Combined rates saved successfully") + + return _combined_rates + + +def process_emfac_population(_study_area, _scenario_name, _work_dir, config, format_func): + """ + Process EMFAC population data by model year, adding proportional calculations. + + Args: + _study_area: Study area name + _scenario_name: Scenario name + _work_dir: Working directory path + config: Configuration dictionary containing filtering and file path information + format_func: Function to format the data + + Returns: + pandas.DataFrame: Processed and grouped population data with proportion calculations + """ + _emfac_population_output_file = os.path.join( + _work_dir, + f"{config["rates"]["output_dir"]}/{_study_area}_emfac_population_{_scenario_name}.csv" + ) + + # Ensure output directory exists + os.makedirs(os.path.dirname(_emfac_population_output_file), exist_ok=True) + + if check_files([_emfac_population_output_file], config["override_rates"]): + print(f"Loading existing EMFAC population data from: {_emfac_population_output_file}") + emfac_population = pd.read_csv(_emfac_population_output_file) + print(f"Loaded population data with {len(emfac_population)} rows") + else: + print(f"Processing EMFAC population data for {_study_area}, scenario: {_scenario_name}") + + include_nan = config["rates"]["filters"]["include_nan"] + calendar_year = config["rates"]["filters"]["calendar_year"] + air_basin_area = config["rates"]["filters"]["sub_area"] + _emfac_population_by_model_year_file = os.path.join( + _work_dir, + config["rates"]["emfac"]["emfac_pop_by_model_year_file"] + ) + + print(f"Reading population data from: {_emfac_population_by_model_year_file}") + + table = csv.read_csv(_emfac_population_by_model_year_file, read_options=pa.csv.ReadOptions(use_threads=True)) + df = table.to_pandas() + print(f"Loaded population data with shape: {df.shape}") + + # Create a progress bar for the filtering steps + filtering_steps = [ + "Calendar year", + "Air basin area", + "Convert population", + "Clean data", + "Format data", + "Group data", + "Calculate proportions" + ] + + with tqdm(total=len(filtering_steps), desc="Processing population data") as pbar: + # Filter by calendar year + if 'calendar_year' in df.columns: + print(f"Filtering by calendar year: {calendar_year}") + before_count = len(df) + df = df[(df['calendar_year'] == calendar_year) | (include_nan & df['calendar_year'].isna())] + print(f"After filtering: {len(df)} rows (removed {before_count - len(df)} rows)") + pbar.update(1) + + # Filter by sub area + if 'sub_area' in df.columns: + print(f"Filtering by air basin area: {air_basin_area}") + before_count = len(df) + # Create a filter condition for partial matches + sub_area_filter = include_nan & df['sub_area'].isna() + + for _area in air_basin_area: + # Look for exact match or area in parentheses (e.g., "Santa Clara (SF)" for "SF") + sub_area_filter = sub_area_filter | df['sub_area'].str.contains(f'\\({_area}\\)', regex=True) | ( + df['sub_area'] == _area) + + # Apply the filter + df = df[sub_area_filter] + print(f"After filtering: {len(df)} rows (removed {before_count - len(df)} rows)") + pbar.update(1) + + # Convert population column to float for calculations + if 'population' in df.columns: + print("Converting population to numeric values") + df['population'] = pd.to_numeric(df['population'], errors='coerce') + # Check for NaN values after conversion + nan_count = df['population'].isna().sum() + if nan_count > 0: + print(f"Warning: {nan_count} rows have non-numeric population values") + pbar.update(1) + + # Clean data + print("Cleaning data (filling NaN values and resetting index)") + df = df.fillna('') + df = df.reset_index(drop=True) + pbar.update(1) + + # Format the data + print("Formatting data with provided format function") + df_formatted = format_func(df) + print(f"Formatted data shape: {df_formatted.shape}") + pbar.update(1) + + # Group by relevant columns and sum population + print("Grouping data and calculating total populations") + emfac_population = df_formatted.groupby('emfacId').agg({ + 'vehicle_class': 'first', + 'fuel': 'first', + 'model_year_group': 'first', + 'mappedFuel': 'first', + 'mappedClass': 'first', + 'population': 'sum' + }).reset_index() + print(f"After grouping: {len(emfac_population)} unique vehicle class/fuel/model year combinations") + pbar.update(1) + + # Calculate total population across all groups + total_population = emfac_population['population'].sum() + print(f"Total vehicle population: {total_population:,.0f}") + + # Calculate proportion of each group relative to total + print("Calculating population proportions") + emfac_population['population_proportion'] = emfac_population['population'] / total_population + + # Print the top 5 vehicle categories by population + print("Top 5 vehicle categories by population:") + top_5 = emfac_population.sort_values('population', ascending=False).head(5) + for _, row in top_5.iterrows(): + percent = row['population_proportion'] * 100 + print(f" {row['vehicle_class']}, {row['fuel']}, {row['model_year_group']}: " + f"{row['population']:,.0f} vehicles ({percent:.2f}%)") + pbar.update(1) + + # Save the processed data + print(f"Writing population data to: {_emfac_population_output_file}") + emfac_population.to_csv(_emfac_population_output_file, index=False) + print("Population data saved successfully") + + return emfac_population + + +def process_emfac_vmt(_study_area, _scenario_name, _work_dir, config, format_func): + """ + Process EMFAC VMT data by model year, adding proportional calculations. + + Args: + _study_area: Study area name + _scenario_name: Scenario name + _work_dir: Working directory path + config: Configuration dictionary containing filtering and file path information + format_func: Function to format the data + + Returns: + pandas.DataFrame: Processed and grouped VMT data with proportion calculations + """ + _emfac_vmt_output_file = os.path.join( + _work_dir, + f"{config["rates"]["output_dir"]}/{_study_area}_emfac_vmt_{_scenario_name}.csv" + ) + + # Ensure output directory exists + os.makedirs(os.path.dirname(_emfac_vmt_output_file), exist_ok=True) + + if check_files([_emfac_vmt_output_file], config["override_rates"]): + print(f"Loading existing EMFAC VMT data from: {_emfac_vmt_output_file}") + emfac_vmt = pd.read_csv(_emfac_vmt_output_file) + print(f"Loaded VMT data with {len(emfac_vmt)} rows") + else: + print(f"Processing EMFAC VMT data for {_study_area}, scenario: {_scenario_name}") + + include_nan = config["rates"]["filters"]["include_nan"] + calendar_year = config["rates"]["filters"]["calendar_year"] + air_basin_area = config["rates"]["filters"]["sub_area"] + _emfac_vmt_by_model_year_file = os.path.join( + _work_dir, + config["rates"]["emfac"]["emfac_vmt_by_model_year_file"] + ) + + print(f"Reading VMT data from: {_emfac_vmt_by_model_year_file}") + + table = csv.read_csv(_emfac_vmt_by_model_year_file, read_options=pa.csv.ReadOptions(use_threads=True)) + df = table.to_pandas() + print(f"Loaded VMT data with shape: {df.shape}") + + # Create a progress bar for the filtering steps + filtering_steps = [ + "Calendar year", + "Air basin area", + "Convert numeric columns", + "Format data", + "Clean data", + "Group data", + "Calculate proportions" + ] + + with tqdm(total=len(filtering_steps), desc="Processing VMT data") as pbar: + # Filter by calendar year + if 'calendar_year' in df.columns: + print(f"Filtering by calendar year: {calendar_year}") + before_count = len(df) + df = df[(df['calendar_year'] == calendar_year) | (include_nan & df['calendar_year'].isna())] + print(f"After filtering: {len(df)} rows (removed {before_count - len(df)} rows)") + pbar.update(1) + + # Filter by sub area + if 'sub_area' in df.columns: + print(f"Filtering by air basin area: {air_basin_area}") + before_count = len(df) + # Create a filter condition for partial matches + sub_area_filter = include_nan & df['sub_area'].isna() + + for _area in air_basin_area: + # Look for exact match or area in parentheses (e.g., "Santa Clara (SF)" for "SF") + sub_area_filter = sub_area_filter | df['sub_area'].str.contains(f'\\({_area}\\)', regex=True) | ( + df['sub_area'] == _area) + + # Apply the filter + df = df[sub_area_filter] + print(f"After filtering: {len(df)} rows (removed {before_count - len(df)} rows)") + pbar.update(1) + + # Convert numeric columns to float for calculations + numeric_columns = ['total_vmt', 'cvmt', 'evmt'] + print("Converting numeric columns to float") + for col in numeric_columns: + if col in df.columns: + df[col] = pd.to_numeric(df[col], errors='coerce') + # Check for NaN values after conversion + nan_count = df[col].isna().sum() + if nan_count > 0: + print(f"Warning: {nan_count} rows have non-numeric {col} values") + pbar.update(1) + + # Format the data + print("Formatting data with provided format function") + df_formatted = format_func(df) + print(f"Formatted data shape: {df_formatted.shape}") + pbar.update(1) + + # Clean data + print("Cleaning data (filling NaN values and resetting index)") + df_formatted = df_formatted.fillna('').reset_index(drop=True) + pbar.update(1) + + # Group by relevant columns and sum VMT + print("Grouping data and calculating total VMT") + emfac_vmt = df_formatted.groupby('emfacId').agg({ + 'vehicle_class': 'first', + 'fuel': 'first', + 'model_year_group': 'first', + 'mappedFuel': 'first', + 'mappedClass': 'first', + 'total_vmt': 'sum' + }).reset_index() + print(f"After grouping: {len(emfac_vmt)} unique vehicle class/fuel/model year combinations") + pbar.update(1) + + # Calculate total VMT across all groups + total_vmt = emfac_vmt['total_vmt'].sum() + print(f"Total VMT: {total_vmt:,.0f}") + + # Calculate proportion of each group relative to total + print("Calculating VMT proportions") + emfac_vmt['vmt_proportion'] = emfac_vmt['total_vmt'] / total_vmt + + # Print the top 5 vehicle categories by VMT + print("Top 5 vehicle categories by VMT:") + top_5 = emfac_vmt.sort_values('total_vmt', ascending=False).head(5) + for _, row in top_5.iterrows(): + percent = row['vmt_proportion'] * 100 + print(f" {row['vehicle_class']}, {row['fuel']}, {row['model_year_group']}: " + f"{row['total_vmt']:,.0f} VMT ({percent:.2f}%)") + pbar.update(1) + + # Save the processed data + print(f"Writing VMT data to: {_emfac_vmt_output_file}") + emfac_vmt.to_csv(_emfac_vmt_output_file, index=False) + print("VMT data saved successfully") + + return emfac_vmt \ No newline at end of file diff --git a/src/main/python/emissions/_emfac_beam_ft_matching.py b/src/main/python/emissions/_emfac_beam_ft_matching.py new file mode 100644 index 00000000000..c9519396a05 --- /dev/null +++ b/src/main/python/emissions/_emfac_beam_ft_matching.py @@ -0,0 +1,689 @@ +import os.path +import sys + +import numpy as np +import pandas as pd +from tqdm import tqdm + +from utils.files_utils import sanitize_name + +# Get the absolute path to the directory containing this script +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(os.path.dirname(current_dir)) +sys.path.insert(0, parent_dir) + +# Now use absolute import + + +def calculate_tour_summary_by_vehicle(payloads_raw): + """ + Calculate tour distances and generate vehicle-level VMT summary statistics. + + This function processes raw payload data to compute tour distances based on + sequential coordinates, then aggregates these distances to the vehicle level. + It calculates both absolute VMT values and proportional VMT shares. + + Args: + payloads_raw (pandas.DataFrame): DataFrame containing payload records with + columns: 'tourId', 'sequenceRank', 'locationX', 'locationY', 'payloadId', + 'vehicleId', and 'payloadType'. + + Returns: + pandas.DataFrame: Vehicle-level summary with total VMT and VMT proportion. + The DataFrame is indexed by vehicleId with columns 'total_vmt' and + 'vmt_proportion'. + + Note: + Distances are calculated using Euclidean distance between consecutive + locations within each tour. + """ + # Sort data by tourId and sequenceRank + df = payloads_raw.sort_values(by=['tourId', 'sequenceRank']) + + # Create shifted columns to calculate distances between consecutive points + df['next_x'] = df.groupby('tourId')['locationX'].shift(-1) + df['next_y'] = df.groupby('tourId')['locationY'].shift(-1) + + # Calculate distances (only where next point exists) + # Create arrays from DataFrame columns for faster operations + x1 = df['locationX'].values + y1 = df['locationY'].values + x2 = df['next_x'].values + y2 = df['next_y'].values + mask = ~np.isnan(x2) + + # Calculate distances using NumPy operations + distances = np.zeros(len(df)) + distances[mask] = np.sqrt((x1[mask] - x2[mask]) ** 2 + (y1[mask] - y2[mask]) ** 2) + + # Assign back to DataFrame + df['segment_distance'] = distances + + # Sum up distances by tour + tour_distances = df.groupby('tourId')['segment_distance'].sum().to_dict() + total_distance = sum(tour_distances.values()) + tour_proportions = {tour_id: dist / total_distance for tour_id, dist in tour_distances.items()} + + # Create summary dataframe + payloads = payloads_raw[['tourId', 'payloadType']].copy() + payloads['payloadType'] = payloads['payloadType'].astype(str) + + # Group by tour and add distance metrics + summary = (payloads + .groupby('tourId')['payloadType'] + .agg('|'.join) + .reset_index()) + + # Add distance metrics + summary['total_vmt'] = summary['tourId'].map(tour_distances) + summary['vmt_proportion'] = summary['tourId'].map(tour_proportions) + + return summary + + +def find_best_match(veh_class, veh_fuel, alternatives_mapping, df): + """ + Find the best matching EMFAC vehicle record using a hierarchical matching strategy. + + This function implements a four-tier fallback approach to match BEAM vehicles + with EMFAC vehicles: + 1. Exact match: Same vehicle class AND fuel type + 2. Fuel-only match: Matching fuel type, any vehicle class + 3. Class-only match: Matching vehicle class, any fuel type + 4. Any-match: Random selection if no other matches are found + + The VMT-weighted sampling ensures that more common vehicle configurations + in the EMFAC dataset are more likely to be selected as matches. + + Args: + veh_class (str): BEAM vehicle class to match + veh_fuel (str): EMFAC fuel type to match + df (pandas.DataFrame): DataFrame of EMFAC vehicles with columns: + 'mappedClass', 'fuel', 'model_year_group', 'emfacId', and 'vmt_proportion' + + Returns: + dict: Matching result with the following keys: + 'match': The matched EMFAC vehicle record + 'type': Match type ('exact', 'fuel', 'class', or 'any') + 'composite_key': String key in format "{year},{class},{fuel}" + 'emfacId': EMFAC vehicle ID from the matched record + 'updates': Dictionary of fields that need to be updated in the original record + """ + # Create boolean arrays once + class_mask = df['mappedClass'].values == veh_class + approx_class_mask = df['mappedClass'].isin(alternatives_mapping[veh_class]) + fuel_mask = df['mappedFuel'].values == veh_fuel + approx_fuel_mask = df['mappedFuel'].isin(alternatives_mapping[veh_fuel]) + + # Combine masks with NumPy + full_match_mask = np.logical_and(class_mask, fuel_mask) + full_matches = df[full_match_mask] + match_type = "type" + if full_matches.empty: + full_matches = df[np.logical_and(class_mask, approx_fuel_mask)] + match_type = "exact-approx-fuel" + if full_matches.empty: + full_matches = df[np.logical_and(approx_class_mask, fuel_mask)] + match_type = "exact-approx-class" + if full_matches.empty: + full_matches = df[np.logical_and(approx_class_mask, approx_fuel_mask)] + match_type = "approx-class-fuel" + + if not full_matches.empty: + match = full_matches.sample(n=1, weights='vmt_proportion').iloc[0] + return { + 'match': match, + 'type': match_type, + 'composite_key': f"{match['model_year_group']},{match['mappedClass']},{match['mappedFuel']}", + 'emfacId': match['emfacId'], + 'updates': {} + } + + # Try fuel match only + fuel_matches = df[fuel_mask] + match_type = "fuel" + if fuel_matches.empty: + fuel_matches = df[approx_fuel_mask] + match_type = "approx-fuel" + + if not fuel_matches.empty: + match = fuel_matches.sample(n=1, weights='vmt_proportion').iloc[0] + return { + 'match': match, + 'type': match_type, + 'composite_key': f"{match['model_year_group']},{match['mappedClass']},{match['mappedFuel']}", + 'emfacId': match['emfacId'], + 'updates': {'mappedClass': match['mappedClass']} + } + + + + # Try class match only + class_matches = df[class_mask] + match_type = 'class' + if class_matches.empty: + class_matches = df[approx_class_mask] + match_type = "approx-class" + + if not class_matches.empty: + match = class_matches.sample(n=1, weights='vmt_proportion').iloc[0] + return { + 'match': match, + 'type': match_type, + 'composite_key': f"{match['model_year_group']},{match['mappedClass']},{match['mappedFuel']}", + 'emfacId': match['emfacId'], + 'updates': {'mappedFuel': match['mappedFuel']} + } + + # Last resort - any vehicle + match = df.sample(n=1, weights='vmt_proportion').iloc[0] + return { + 'match': match, + 'type': 'any', + 'composite_key': f"{match['model_year_group']},{match['mappedClass']},{match['mappedFuel']}", + 'emfacId': match['emfacId'], + 'updates': { + 'mappedClass': match['mappedClass'], + 'mappedFuel': match['mappedFuel'] + } + } + + +def analyze_vmt_distribution(beam_vmt_track, emfac_vmt_track): + """ + Analyze and compare VMT distributions between EMFAC and BEAM data after mapping. + + This function takes the VMT tracking dictionaries from the mapping process + and generates detailed comparative analysis in two dimensions: + 1. By model year group and vehicle class + 2. By fuel type only + + For each comparison, the function: + - Creates DataFrames from the tracking dictionaries + - Aggregates VMT proportions by the relevant dimensions + - Calculates absolute and percentage differences + - Prints formatted tables of the most significant differences + - Reports summary statistics on the overall distribution match + + Args: + beam_vmt_track (dict): Dictionary with composite keys (year,class,fuel) mapping + to BEAM VMT proportions + emfac_vmt_track (dict): Dictionary with composite keys (year,class,fuel) mapping + to EMFAC VMT proportions + + Returns: + None: Results are printed to standard output + + Note: + The function expects composite keys in the format "year,class,fuel" and + will parse these to create structured DataFrames for comparison. + """ + print("\n=== VMT Distribution Analysis ===") + + # Create DataFrames from tracking dictionaries + emfac_rows = [] + beam_rows = [] + + for composite_key in set(list(beam_vmt_track.keys()) + list(emfac_vmt_track.keys())): + parts = composite_key.split(',') + if len(parts) == 3: + model_year_group, mapped_class, mapped_fuel = parts + + # Get VMT proportions (default to 0 if not present) + emfac_proportion = emfac_vmt_track.get(composite_key, 0) + beam_proportion = beam_vmt_track.get(composite_key, 0) + + # Add to respective lists + emfac_rows.append({ + 'model_year_group': model_year_group, + 'mappedClass': mapped_class, + 'mappedFuel': mapped_fuel, + 'vmt_proportion': emfac_proportion + }) + + beam_rows.append({ + 'model_year_group': model_year_group, + 'mappedClass': mapped_class, + 'mappedFuel': mapped_fuel, + 'vmt_share': beam_proportion + }) + + emfac_df = pd.DataFrame(emfac_rows) + beam_df = pd.DataFrame(beam_rows) + + # 1. Compare by model_year_group and mappedClass + print("\n--- VMT Comparison by Model Year and Vehicle Class ---") + + # Aggregate by year and class + emfac_by_year_class = emfac_df.groupby(['model_year_group', 'mappedClass'])['vmt_proportion'].sum().reset_index() + beam_by_year_class = beam_df.groupby(['model_year_group', 'mappedClass'])['vmt_share'].sum().reset_index() + + # Merge for comparison + year_class_comparison = pd.merge( + emfac_by_year_class, + beam_by_year_class, + on=['model_year_group', 'mappedClass'], + how='outer', + copy=False + ).fillna(0) + + # Calculate differences + year_class_comparison['difference'] = year_class_comparison['vmt_proportion'] - year_class_comparison['vmt_share'] + year_class_comparison['abs_difference'] = abs(year_class_comparison['difference']) + + # Sort by absolute difference and get top 10 + top_diff = year_class_comparison.sort_values('abs_difference', ascending=False).head(10) + + # Print table header + print("\nTop 10 VMT Proportion Differences by Year and Class:") + print("------------------------------------------------------------------") + print(f"{'Year':^10} | {'Class':^15} | {'EMFAC %':^10} | {'BEAM %':^10} | {'Diff %':^10}") + print("------------------------------------------------------------------") + + # Print each row with formatting + for _, row in top_diff.iterrows(): + print(f"{row['model_year_group']:^10} | " + f"{row['mappedClass']:^15} | " + f"{row['vmt_proportion'] * 100:^10.2f} | " + f"{row['vmt_share'] * 100:^10.2f} | " + f"{row['difference'] * 100:^10.2f}") + + # 2. Compare by fuel only + print("\n\n--- VMT Comparison by Fuel Type ---") + + # Aggregate by fuel + emfac_by_fuel = emfac_df.groupby(['mappedFuel'])['vmt_proportion'].sum().reset_index() + beam_by_fuel = beam_df.groupby(['mappedFuel'])['vmt_share'].sum().reset_index() + + # Merge for comparison + fuel_comparison = pd.merge( + emfac_by_fuel, + beam_by_fuel, + on=['mappedFuel'], + how='outer' + ).fillna(0) + + # Calculate differences + fuel_comparison['difference'] = fuel_comparison['vmt_proportion'] - fuel_comparison['vmt_share'] + fuel_comparison['abs_difference'] = abs(fuel_comparison['difference']) + + # Sort by absolute difference + fuel_comparison = fuel_comparison.sort_values('abs_difference', ascending=False) + + # Print table header + print("\nVMT Proportion Differences by Fuel Type:") + print("------------------------------------------------------------------") + print(f"{'Fuel Type':^15} | {'EMFAC %':^10} | {'BEAM %':^10} | {'Diff %':^10}") + print("------------------------------------------------------------------") + + # Print each row with formatting + for _, row in fuel_comparison.iterrows(): + print(f"{row['mappedFuel']:^15} | " + f"{row['vmt_proportion'] * 100:^10.2f} | " + f"{row['vmt_share'] * 100:^10.2f} | " + f"{row['difference'] * 100:^10.2f}") + + # Print summary stats + print("\n--- Summary Statistics ---") + print(f"Total model year/class combinations: {len(year_class_comparison)}") + print(f"Total fuel types: {len(fuel_comparison)}") + print(f"Max absolute difference by year/class: {year_class_comparison['abs_difference'].max() * 100:.2f}%") + print(f"Max absolute difference by fuel: {fuel_comparison['abs_difference'].max() * 100:.2f}%") + print(f"Average absolute difference by year/class: {year_class_comparison['abs_difference'].mean() * 100:.2f}%") + print(f"Average absolute difference by fuel: {fuel_comparison['abs_difference'].mean() * 100:.2f}%") + + +def emfac2freight_by_model_year_class_fuel(ft_emfac_vmt, carriers_raw, payloads_raw, vehicle_types_formatted, alternatives_mapping): + """ + Map EMFAC vehicle data to BEAM freight vehicles based on VMT proportions and vehicle attributes. + + This function performs a comprehensive matching process between EMFAC's emissions database + and BEAM's freight vehicle fleet. It uses VMT (vehicle miles traveled) proportions as a key + metric to ensure the distribution of vehicle types in the mapped result preserves the original + EMFAC emissions characteristics. + + The matching process: + 1. Calculates VMT for each BEAM freight vehicle from tour payload data + 2. Extracts VMT proportions from the EMFAC data by vehicle class, model year, and fuel type + 3. Creates composite tracking keys in the format "year,class,fuel" for comparison + 4. Matches each BEAM vehicle to an appropriate EMFAC vehicle using a hierarchical strategy + 5. Tracks VMT allocation to prevent overallocation of specific vehicle configurations + 6. Analyzes and reports on the resulting VMT distribution match quality + + Args: + ft_emfac_vmt (pandas.DataFrame): EMFAC VMT data containing columns 'mappedClass', + 'model_year_group', 'fuel', 'total_vmt', and 'emfacId' for freight fleet + carriers_raw (pandas.DataFrame): Raw carriers data with 'vehicleId' and 'vehicleTypeId' + payloads_raw (pandas.DataFrame): Raw payload data for calculating tour distances + vehicle_types_formatted (pandas.DataFrame): Pre-formatted vehicle types with 'vehicleTypeId', + 'mappedClass', and 'mappedFuel' + + Returns: + pandas.DataFrame: Mapping result with columns 'vehicleId', 'emfacId', 'vehicleTypeId', + 'mappedFuel', and 'mappedClass' + + Note: + The function prints progress information and performs VMT distribution analysis + after completing the mapping. + """ + print("=== VMT-based Mapping Of BEAM Freight with EMFAC ===") + + # Step 1: Calculate euclidian VMT dataframe + tour_summary = calculate_tour_summary_by_vehicle(payloads_raw) + + # Step 2: Merge with vehicle types + vehicle_w_vmt = pd.merge( + tour_summary, + carriers_raw[['tourId', 'vehicleId', 'vehicleTypeId']], + on='tourId', + how='left', + copy=False + ).groupby('vehicleId').agg({ + 'total_vmt': 'sum', + 'vmt_proportion': 'sum', + 'vehicleTypeId': 'first' + }).reset_index() + vehicle_w_vmt = pd.merge(vehicle_w_vmt, vehicle_types_formatted, on='vehicleTypeId', how='left', copy=False) + vehicle_w_vmt = vehicle_w_vmt.sort_values('vmt_proportion', ascending=False).reset_index(drop=True) + total_beam_vmt = vehicle_w_vmt['total_vmt'].sum() + print(f"BEAM VMT with {len(vehicle_w_vmt)} rows and total vmt of {total_beam_vmt}.") + + # Step 3: Extract VMT proportion in EMFAC data + # Optimized code + emfac_w_vmt = ft_emfac_vmt.groupby(['mappedClass', 'model_year_group', 'mappedFuel', 'emfacId'])['total_vmt'].sum().reset_index() + emfac_w_vmt['vmt_proportion'] = emfac_w_vmt['total_vmt'] / emfac_w_vmt['total_vmt'].sum() + emfac_w_vmt = emfac_w_vmt.sort_values('vmt_proportion', ascending=False).reset_index(drop=True) + + total_emfac_vmt = emfac_w_vmt['total_vmt'].sum() + print(f"EMFAC VMT with {len(ft_emfac_vmt)} rows and total vmt of {total_emfac_vmt}.") + + # Step 4: Create composite key for tracking + emfac_w_vmt['composite_key'] = ( + emfac_w_vmt['model_year_group'].astype(str) + ',' + + emfac_w_vmt['mappedClass'] + ',' + + emfac_w_vmt['mappedFuel'] + ) + + key_vmt_series = emfac_w_vmt.groupby('composite_key')['total_vmt'].sum() + emfac_vmt_track = {k: v / total_emfac_vmt for k, v in key_vmt_series.items()} + + # Print top EMFAC VMT proportions + print("Top EMFAC VMT proportions:") + for key, prop in sorted(emfac_vmt_track.items(), key=lambda x: x[1], reverse=True)[:5]: + print(f" {key}: {prop:.4f}") + + # Step 5: Match BEAM vehicles to EMFAC vehicles with VMT-weighted sampling + # Initialize tracking + emfac_w_vmt_fallback = emfac_w_vmt.copy() + beam_vmt_track = {} + vehicle_w_vmt['assigned_class'] = "" # Track matching strategy + + # Create progress bar + total_vehicles = len(vehicle_w_vmt) + print(f"Matching {total_vehicles} vehicles to EMFAC records...") + + # Perform the matching + for i, row in tqdm(vehicle_w_vmt.iterrows(), total=total_vehicles, desc="Matching vehicles"): + veh_class = row['mappedClass'] + veh_fuel = row['mappedFuel'] + vmt_prop = row['vmt_proportion'] + + # Restore the full set if we've run out of options + if emfac_w_vmt.empty: + emfac_w_vmt = emfac_w_vmt_fallback.copy() + + # Find the best match + result = find_best_match( + veh_class, + veh_fuel, + alternatives_mapping, + emfac_w_vmt + ) + + if result['type'] == "any": + print(result) + + # Apply updates + vehicle_w_vmt.loc[i, "emfacId"] = result['emfacId'] + vehicle_w_vmt.loc[i, "assigned_class"] = result['type'] + + for key, value in result['updates'].items(): + vehicle_w_vmt.loc[i, key] = value + + # Track VMT allocation + composite_key = result['composite_key'] + beam_vmt_track[composite_key] = beam_vmt_track.get(composite_key, 0) + vmt_prop + + # Check if we've exhausted this composite key + if beam_vmt_track[composite_key] >= emfac_vmt_track[composite_key]: + print(f"Exhausted composite key {composite_key}: " + f"emfac={emfac_vmt_track[composite_key]:.4f}, " + f"beam={beam_vmt_track[composite_key]:.4f}") + + # Remove this composite key from available options + emfac_w_vmt = emfac_w_vmt[emfac_w_vmt["composite_key"] != composite_key] + + # Prepare the final result + result_columns = ["vehicleId", "emfacId", "vehicleTypeId", "mappedFuel", "mappedClass"] + result_df = vehicle_w_vmt[result_columns] + + # Analyze VMT distribution + analyze_vmt_distribution(beam_vmt_track, emfac_vmt_track) + + return result_df + + +def process_emfac_mappings(mapping_results, vehicle_types, vehicle_types_raw): + """ + Process EMFAC mapping results to create new vehicle types with optimized performance. + + Args: + mapping_results (pd.DataFrame): Results of EMFAC-to-BEAM mapping + vehicle_types (pd.DataFrame): Formatted vehicle types for matching + vehicle_types_raw (pd.DataFrame): Original vehicle types with all columns + + Returns: + tuple: (new_fleet, vehicle_type_map) + """ + from tqdm import tqdm + + print(f"Processing {len(mapping_results)} vehicle mappings...") + + # Initialize tracking variables + match_stats = {"fuel_and_class": 0, "fuel_only": 0, "class_only": 0, "none": 0} + + # For reproducibility + np.random.seed(42) + + # Step 1: Create lookup dataframes for each matching strategy + print("Building lookup tables for matching...") + match_keys = [] + + # Get all unique combinations from mapping_results + unique_fuel_class_combos = mapping_results[['mappedFuel', 'mappedClass']].drop_duplicates().reset_index(drop=True) + + # For each unique combination, find matching vehicle types + for _, combo in tqdm(unique_fuel_class_combos.iterrows(), + total=len(unique_fuel_class_combos), + desc="Building match tables"): + mapped_fuel = combo['mappedFuel'] + mapped_class = combo['mappedClass'] + + # Find matching indices for this combination + both_match = vehicle_types[(vehicle_types['mappedFuel'] == mapped_fuel) & + (vehicle_types['mappedClass'] == mapped_class)] + + fuel_match = vehicle_types[vehicle_types['mappedFuel'] == mapped_fuel] + class_match = vehicle_types[vehicle_types['mappedClass'] == mapped_class] + + # Store the match keys for later use + match_keys.append({ + 'mappedFuel': mapped_fuel, + 'mappedClass': mapped_class, + 'both_match': both_match['vehicleTypeId'].tolist() if not both_match.empty else [], + 'fuel_match': fuel_match['vehicleTypeId'].tolist() if not fuel_match.empty else [], + 'class_match': class_match['vehicleTypeId'].tolist() if not class_match.empty else [] + }) + + # Convert to dataframe for easier joining + match_keys_df = pd.DataFrame(match_keys) + + # Step 2: Join mapping results with match keys + print("Joining mapping results with match keys...") + merged_data = pd.merge( + mapping_results, + match_keys_df, + on=['mappedFuel', 'mappedClass'], + how='left' + ) + + # Step 3: Vectorized creation of new records + print("Applying matching strategy...") + # Prepare to collect results + vehicle_type_map = {} + new_rows = [] + + # Allocate arrays to determine match type for each record + match_type = np.full(len(merged_data), 'none', dtype=object) + match_vehicle_type_id = np.full(len(merged_data), None, dtype=object) + + # Apply matching strategy in order of preference: both > fuel > class > random + for i, row in tqdm(merged_data.iterrows(), + total=len(merged_data), + desc="Finding matches"): + if row['both_match']: + # Match on both fuel and class + match_vehicle_type_id[i] = np.random.choice(row['both_match']) + match_type[i] = 'fuel_and_class' + match_stats['fuel_and_class'] += 1 + elif row['fuel_match']: + # Fall back to matching on fuel only + match_vehicle_type_id[i] = np.random.choice(row['fuel_match']) + match_type[i] = 'fuel_only' + match_stats['fuel_only'] += 1 + elif row['class_match']: + # Fall back to matching on class only + match_vehicle_type_id[i] = np.random.choice(row['class_match']) + match_type[i] = 'class_only' + match_stats['class_only'] += 1 + else: + # Last resort: use any vehicle type + match_vehicle_type_id[i] = np.random.choice(vehicle_types['vehicleTypeId'].values) + match_type[i] = 'none' + match_stats['none'] += 1 + print(f" Warning: No match found for vehicleId={row['vehicleId']}, " + f"mappedClass={row['mappedClass']}, mappedFuel={row['mappedFuel']}") + + # Step 4: Create new vehicle types in a vectorized way + print("Creating new vehicle types...") + for i, row in tqdm(merged_data.iterrows(), + total=len(merged_data), + desc="Creating vehicle types"): + mapped_vehicle_id = row['vehicleId'] + mapped_emfac_id = row['emfacId'] + old_vehicle_type_id = match_vehicle_type_id[i] + + # Create new vehicle type ID that incorporates the EMFAC ID + old_vehicle_type_id_formatted = sanitize_name(old_vehicle_type_id).replace("_", "") + new_vehicle_type_id = f"{mapped_emfac_id}--{old_vehicle_type_id_formatted}" + + # Store the mapping for later carrier updates + vehicle_type_map[mapped_vehicle_id] = new_vehicle_type_id + + # Get the original record from vehicle_types_raw + original_record = vehicle_types_raw[vehicle_types_raw['vehicleTypeId'] == old_vehicle_type_id].iloc[ + 0].copy() + original_record['emfacId'] = mapped_emfac_id + original_record['vehicleTypeId'] = new_vehicle_type_id + new_rows.append(original_record) + + # Create dataframe from collected rows + print("Finalizing results...") + new_fleet = pd.DataFrame(new_rows) + + # Print summary statistics + print("\nMatch statistics:") + print(f" Exact matches (fuel and class): {match_stats['fuel_and_class']}") + print(f" Fuel-only matches: {match_stats['fuel_only']}") + print(f" Class-only matches: {match_stats['class_only']}") + print(f" No matches: {match_stats['none']}") + print(f" Total vehicles processed: {len(mapping_results)}") + print(f" Created {len(new_fleet)} new vehicle types") + + return new_fleet, vehicle_type_map + + +def generate_emfac_mapped_freight_fleet(emfac_vmt, freight_classes, work_dir, config, format_func): + """ + Create updated vehicle types and carriers files based on EMFAC mapping. + + This function performs a complete EMFAC to BEAM freight vehicle mapping workflow: + 1. Loads necessary input files (carriers, payloads, vehicle types) + 2. Formats vehicle types for EMFAC compatibility + 3. Performs the EMFAC-to-BEAM mapping process + 4. Creates new vehicle type records with EMFAC-specific IDs + 5. Updates carrier references to point to the new vehicle types + + Args: + emfac_vmt (pandas.DataFrame): EMFAC VMT data with emissions characteristics + freight_classes (list): List of vehicle classes to consider as freight vehicles + format_func (callable): Function to format vehicle types for EMFAC mapping + work_dir (str): Working directory containing input files + config (dict): Configuration dictionary with file paths and settings + + Returns: + tuple: (updated_carriers_df, updated_vehicle_types_df) + """ + # Prepare file paths + carriers_file = str(os.path.join(work_dir, config["beam"]["carriers_file"])) + payloads_file = str(os.path.join(work_dir, config["beam"]["payloads_file"])) + vehicle_types_file = str(os.path.join(work_dir, config["beam"]["ft_vehicle_types_file"])) + + # Load source data + print(f"Loading data from:\n {carriers_file}\n {vehicle_types_file}") + carriers_raw = pd.read_csv(carriers_file) + payloads_raw = pd.read_csv(payloads_file) + vehicle_types_raw = pd.read_csv(vehicle_types_file, dtype=str) + + # Get freight vehicle types with EMFAC mappings + vehicle_types = format_func( + vehicle_types_raw.loc[ + vehicle_types_raw['vehicleCategory'].isin(freight_classes), + ['vehicleTypeId', 'vehicleCategory', 'primaryFuelType', 'secondaryFuelType'] + ].copy() + ) + + # Filter EMFAC VMT data to freight classes + ft_emfac_vmt = emfac_vmt[['mappedClass', 'model_year_group', 'mappedFuel', 'total_vmt', 'emfacId']][ + emfac_vmt["mappedClass"].isin(freight_classes) + ].copy() + + fuel_class_alternative_mapping = config["mapping"]["fuel"]["alternatives"] | config["mapping"]["class"]["alternatives"] + + # Get mapping between EMFAC and freight vehicles + mapping_results = emfac2freight_by_model_year_class_fuel( + ft_emfac_vmt, + carriers_raw, + payloads_raw, + vehicle_types, + fuel_class_alternative_mapping + ) + + # Process mappings to create new vehicle types + new_fleet, vehicle_type_map = process_emfac_mappings( + mapping_results, + vehicle_types, + vehicle_types_raw + ) + + # Update the carriers file with new vehicle type IDs + new_carriers = carriers_raw.copy() + new_carriers["vehicleTypeId"] = new_carriers["vehicleId"].map( + pd.Series(vehicle_type_map)).fillna(new_carriers["vehicleTypeId"]) + + print(f"Updated {len(new_carriers)} carrier records") + + # Group by all columns except vehicleId, dropping the vehicleId column + new_vehicle_types = new_fleet[[col for col in new_fleet.columns if col != 'vehicleId']].drop_duplicates() + + return new_carriers, new_vehicle_types diff --git a/src/main/python/emissions/_emfac_beam_pax_mapping.py b/src/main/python/emissions/_emfac_beam_pax_mapping.py new file mode 100644 index 00000000000..883cc248c5b --- /dev/null +++ b/src/main/python/emissions/_emfac_beam_pax_mapping.py @@ -0,0 +1,686 @@ +import os +import re +import sys + +import numpy as np +import pandas as pd +from tqdm import tqdm + +from utils.files_utils import sanitize_name + +# Get the absolute path to the directory containing this script +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(os.path.dirname(current_dir)) +sys.path.insert(0, parent_dir) + + +def parse_sample_probability_string(prob_string): + """ + Parse a sample probability string to extract income and ridehail probabilities. + + The expected format is "income|:; ridehail|" + For example: "income|25-50:0.250000; ridehail|0.200000" + + Args: + prob_string (str): The probability string to parse + + Returns: + tuple: A tuple containing: + - income_bin (str or None): The income bin/range (e.g., '25-50') + - income_prob (float or None): The probability associated with this income bin + - ridehail_prob (float or None): The ridehail probability + """ + # Early return for empty strings or NaN values + if pd.isna(prob_string) or prob_string == "": + return None, None, None + + # Remove spaces and convert to lowercase in one step + cleaned = prob_string.replace(" ", "").lower() + + # Use regex for faster parsing with compile once pattern + income_match = re.search(r"income\|([^:]+):([0-9.]+)", cleaned) + ridehail_match = re.search(r"ridehail\|([^:]+):([0-9.]+)", cleaned) + + # Extract values from matches + income_bin = income_match.group(1) if income_match else None + income_prob = float(income_match.group(2)) if income_match else None + ridehail_bin = ridehail_match.group(1) if ridehail_match else None + ridehail_prob = float(ridehail_match.group(2)) if ridehail_match else None + + return income_bin, income_prob, ridehail_bin, ridehail_prob + + +def create_sample_probability_string(income_bin, income_prob, ridehail_bin, ridehail_prob): + """ + Convert income bin, income probability, and ridehail probability back to a sample probability string. + + This function is the inverse of parse_sample_probability_string. + The resulting string will be in the format: "income|:; ridehail|" + + Args: + income_bin (str or None): Income bin/range (e.g., '25-50', '50-75') + income_prob (float or None): Income probability value + ridehail_prob (float or None): Ridehail probability value + + Returns: + str: Formatted sample probability string + Example: "income|25-50:0.250000; ridehail|0.200000" + Returns empty string if all inputs are None + """ + # Quick return for empty data + if income_bin is None and income_prob is None and ridehail_bin is None and ridehail_prob is None: + return "" + + # Pre-allocate list with appropriate size to avoid resizing + parts = [] + + # Build parts directly + if income_bin is not None and income_prob is not None: + parts.append(f"income|{income_bin}:{income_prob:.6f}") + + if ridehail_bin is not None and ridehail_prob is not None: + parts.append(f"ridehail|{ridehail_bin}:{ridehail_prob:.6f}") + + # Use faster string joining + return "; ".join(parts) + + +def process_vehicle_types_probabilities_by_vehicle_category_and_income_group(vehicle_types): + """ + Process vehicle types data by extracting and normalizing probability distributions. + + This function: + 1. Extracts income bins, income probabilities, and ridehail probabilities from the + sampleProbabilityString column + 2. Normalizes probabilities by vehicle category (ensures sum equals 1 for each category) + 3. Normalizes probabilities by income bin (ensures sum equals 1 for each income bin) + 4. Normalizes ridehail probabilities (ensures sum equals 1) + + Args: + vehicle_types (pd.DataFrame): DataFrame containing vehicle types data with columns: + - vehicleCategory: Category of vehicle + - sampleProbabilityString: String containing probability information + - sampleProbabilityWithinCategory: Probability of the vehicle within its category + + Returns: + pd.DataFrame: Processed DataFrame with additional columns: + - income_bin: Extracted income bin + - income_prop: Normalized income probability + - ridehail_prop: Normalized ridehail probability + """ + # Create a copy of the dataframe to avoid modifying the original + df = vehicle_types.copy() + + # Add new columns directly using vectorized operations + # Apply parse_sample_probability_string to all rows at once + parsed_data = df['sampleProbabilityString'].apply(parse_sample_probability_string) + df['income_bin'] = parsed_data.apply(lambda x: x[0]) + df['income_prop'] = parsed_data.apply(lambda x: x[1]) + df['ridehail_bin'] = parsed_data.apply(lambda x: x[2]) + df['ridehail_prop'] = parsed_data.apply(lambda x: x[3]) + + df['sampleProbabilityWithinCategory'] = pd.to_numeric(df['sampleProbabilityWithinCategory'], errors='coerce') + + # Normalize probabilities by category using groupby operations + # This is faster than iterating through unique categories + category_groups = df.groupby('vehicleCategory') + df['sampleProbabilityWithinCategory'] = df.apply( + lambda row: row['sampleProbabilityWithinCategory'] / + category_groups.get_group(row['vehicleCategory'])['sampleProbabilityWithinCategory'].sum() + if category_groups.get_group(row['vehicleCategory'])['sampleProbabilityWithinCategory'].sum() > 0 + else row['sampleProbabilityWithinCategory'], + axis=1 + ) + + # Normalize income probabilities by category and income bin + for category in df['vehicleCategory'].unique(): + category_df = df[df['vehicleCategory'] == category] + + for income_bin in category_df['income_bin'].dropna().unique(): + mask = (df['vehicleCategory'] == category) & (df['income_bin'] == income_bin) + prob_sum = df.loc[mask, 'income_prop'].sum() + + if prob_sum > 0: + df.loc[mask, 'income_prop'] = df.loc[mask, 'income_prop'] / prob_sum + + # Normalize ridehail probabilities - can be done with vectorized operations + for category in df['vehicleCategory'].unique(): + category_df = df[df['vehicleCategory'] == category] + + for ridehail_bin in category_df['ridehail_bin'].dropna().unique(): + mask = (df['vehicleCategory'] == category) & (df['ridehail_bin'] == ridehail_bin) + prob_sum = df.loc[mask, 'ridehail_prop'].sum() + + if prob_sum > 0: + df.loc[mask, 'ridehail_prop'] = df.loc[mask, 'ridehail_prop'] / prob_sum + + return df + + +def emfac2passenger_with_atlas_crosswalk(vehicle_types, atlas_emfac_fleet, work_dir, config): + """ + Distribute total_vmt and population values evenly across different vehicle typeIds + that share the same emfacId and bodytype combination. + + Args: + vehicle_types (pd.DataFrame): DataFrame with vehicleTypeId and bodytype + atlas_emfac_fleet (pd.DataFrame): DataFrame with emfacId, bodytype, total_vmt, population + + Returns: + pd.DataFrame: DataFrame with distributed vmt and population values + """ + routee_beam_atlas_map = pd.read_csv(str(os.path.join(work_dir, config["mapping"]["atlas"]["routee"])), dtype=str) + vehicles = pd.read_csv(str(os.path.join(work_dir, config["beam"]["pax_vehicles_file"])), dtype=str) + beam_fleet = vehicles["vehicleTypeId"].unique() + filtered_vehicle_types = vehicle_types[vehicle_types["vehicleTypeId"].isin(beam_fleet)].copy() + beam_fleet = filtered_vehicle_types["vehicleTypeId"].unique() + vehicles_filtered = vehicles[vehicles["vehicleTypeId"].isin(beam_fleet)].copy() + + # Step 1: Merge vehicle types with body types + unique_vehicle_bodytype_map = routee_beam_atlas_map.groupby("vehicleTypeId")["bodytype"].first().to_dict() + vehicle_types_with_body_types = filtered_vehicle_types.copy() + vehicle_types_with_body_types["bodytype"] = filtered_vehicle_types["vehicleTypeId"].map(unique_vehicle_bodytype_map).str.lower().str.capitalize() + + # Step 2: Merge with EMFAC fleet data + vehicles_atlas_emfac = pd.merge( + left=vehicle_types_with_body_types, + right=atlas_emfac_fleet, + left_on=['bodytype', 'mappedFuel', 'mappedClass'], + right_on=['bodytype', 'mappedFuel', 'mappedClass'], + how='left' + ) + + # Step 3: Count matching vehicle types for each emfacId + emfac_counts = vehicles_atlas_emfac.groupby('emfacId').size().to_dict() + + # Step 4: Create a mapping of emfacId to total_vmt and population + emfac_values = {row['emfacId']: (row['total_vmt'], row['population']) + for _, row in atlas_emfac_fleet.iterrows()} + + # Step 5: Apply the distribution in one vectorized operation + vehicles_atlas_emfac['total_vmt'] = vehicles_atlas_emfac.apply( + lambda row: emfac_values[row['emfacId']][0] / emfac_counts[row['emfacId']] + if row['emfacId'] in emfac_counts and emfac_counts[row['emfacId']] > 0 else 0, + axis=1 + ) + vehicles_atlas_emfac['population'] = vehicles_atlas_emfac.apply( + lambda row: emfac_values[row['emfacId']][1] / emfac_counts[row['emfacId']] + if row['emfacId'] in emfac_counts and emfac_counts[row['emfacId']] > 0 else 0, + axis=1 + ) + + # Get only valid rows + results = vehicles_atlas_emfac.reset_index(drop=True) + + # Calculate proportions + results['vmt_proportion'] = results.groupby('vehicleCategory')['total_vmt'].transform( + lambda x: x / x.sum() if x.sum() > 0 else 0 + ) + results['population_proportion'] = results.groupby('vehicleCategory')['population'].transform( + lambda x: x / x.sum() if x.sum() > 0 else 0 + ) + + total_vehicle_types = len(vehicles_filtered) + vehicle_type_counts = vehicles_filtered.groupby('vehicleTypeId').size().reset_index(name='count') + vehicle_type_counts["proportion"] = vehicle_type_counts["count"] / total_vehicle_types + vehicle_type_proportions = dict(zip(vehicle_type_counts['vehicleTypeId'], vehicle_type_counts['proportion'])) + + # Apply distribution - fixed the dictionary lookup + results['distribution'] = results['vehicleTypeId'].map(vehicle_type_proportions) * results['vmt_proportion'] + total = results['distribution'].sum() + results['distribution'] = results['distribution'] / total if total > 0 else 0 + + results['sampleProbabilityWithinCategory'] = results['distribution'] + results['income_bin'] = "all" + results['income_prop'] = results['distribution'] + results['ridehail_bin'] = "all" + results['ridehail_prop'] = results['distribution'] + + results['sampleProbabilityString'] = results.apply( + lambda row: create_sample_probability_string( + row['income_bin'], row['income_prop'], row['ridehail_bin'], row['ridehail_prop'] + ), + axis=1 + ) + + return results + + +def emfac2passenger_by_category_income(vehicle_types, car_emfac_fleet, config): + """ + Merge passenger vehicle types with EMFAC vmt data. + + This function combines vehicle type data with EMFAC vmt distribution data, + adjusting and normalizing probabilities to maintain consistent distributions from + both input sources. + + Args: + vehicle_types (pd.DataFrame): DataFrame of vehicle types with columns: + - vehicleTypeId: ID of vehicle type + - mappedClass: Vehicle class category in BEAM + - mappedFuel: Fuel type compatible with EMFAC categories + - income_bin: Income bin/range (e.g., '25-50') + - income_prop: Probability for this income group + - ridehail_prop: Ridehail probability + - sampleProbabilityWithinCategory: Probability within vehicle category + + car_emfac_fleet (pd.DataFrame): DataFrame of EMFAC vehicle population and vmt with columns: + - emfacId: ID of EMFAC vehicle type + - mappedClass: Vehicle class category in BEAM + - vehicle_class: Specific vehicle class (e.g., 'LD1', 'LD2') + - mappedFuel: Fuel type in EMFAC + - vmt_proportion: Proportion in the total vehicle vmt + + Returns: + pd.DataFrame: Merged dataframe with new columns: + - newId: Combined ID (emfacId--vehicleTypeId) + - vmt_normalized: Normalized vmt proportion + - newProportionIncome: Recalculated income proportion + - newProportionRidehail: Recalculated ridehail proportion + - sampleProbabilityString: Updated probability string + """ + df_merged = pd.merge( + left=vehicle_types, + right=car_emfac_fleet, + left_on=['mappedClass', 'mappedFuel'], + right_on=['mappedClass', 'mappedFuel'], + how='left' + ) + + # Calculate vehicle class probabilities given fuel type using groupby + vehicle_class_probs = {} + # Group by mappedClass and fuel to get distribution by vehicle_class + grouped = car_emfac_fleet.groupby(['mappedClass', 'mappedFuel']) + + for group_key, group_df in grouped: + mapped_class, mapped_fuel = group_key + if (mapped_class, mapped_fuel) not in vehicle_class_probs: + vehicle_class_probs[(mapped_class, mapped_fuel)] = {} + + # Calculate normalized probabilities for each vehicle class within the group + total_vmt = group_df['total_vmt'].sum() + for _, row in group_df.iterrows(): + vehicle_class_probs[(mapped_class, mapped_fuel)][row['vehicle_class']] = row['total_vmt'] / total_vmt + + # Apply the conditional probability formula to calculate new proportions + # Using a vectorized approach where possible + def get_vehicle_class_prob(row): + key = (row['mappedClass'], row['mappedFuel']) + vehicle_class = row['vehicle_class'] + return vehicle_class_probs.get(key, {}).get(vehicle_class, 0) + + # Calculate vehicle class probabilities for each row + df_merged['vehicle_class_prob'] = df_merged.apply(get_vehicle_class_prob, axis=1) + + # Calculate new proportions + if config["mapping"]["fleet"]["ignore_beam_passenger_distribution"]: + df_merged['sampleProbabilityWithinCategory'] = df_merged['total_vmt'] + df_merged['income_prop'] = df_merged['vehicle_class_prob'] + df_merged['ridehail_prop'] = df_merged['vehicle_class_prob'] + else: + if 'population' in df_merged.columns and df_merged['population'].sum() > 0: + total = df_merged['population'].sum() + df_merged["population_proportion"] = df_merged['population'] / total + df_merged['sampleProbabilityWithinCategory'] = df_merged['population_proportion'] * df_merged[ + 'vmt_normalized'] + df_merged['income_prop'] = df_merged['population_proportion'] * df_merged['vehicle_class_prob'] + df_merged['ridehail_prop'] = df_merged['population_proportion'] * df_merged['vehicle_class_prob'] + else: + df_merged['sampleProbabilityWithinCategory'] = df_merged['sampleProbabilityWithinCategory'] * df_merged[ + 'vmt_normalized'] + df_merged['income_prop'] = df_merged['income_prop'] * df_merged['vehicle_class_prob'] + df_merged['ridehail_prop'] = df_merged['ridehail_prop'] * df_merged['vehicle_class_prob'] + + # Normalize by income group using groupby + for income_group in df_merged['income_bin'].dropna().unique(): + mask = df_merged['income_bin'] == income_group + + # Normalize income proportions + income_sum = df_merged.loc[mask, 'income_prop'].sum() + if income_sum > 0: + df_merged.loc[mask, 'income_prop'] = df_merged.loc[mask, 'income_prop'] / income_sum + + for ridehail_group in df_merged['ridehail_bin'].dropna().unique(): + mask = df_merged['ridehail_bin'] == ridehail_group + + # Normalize ridehail proportions + ridehail_sum = df_merged.loc[mask, 'ridehail_prop'].sum() + if ridehail_sum > 0: + df_merged.loc[mask, 'ridehail_prop'] = df_merged.loc[mask, 'ridehail_prop'] / ridehail_sum + + # Recreate the sample probability string with updated values + # Note: The original code uses 'new_income_prob' but this variable isn't defined or created in the function + # Using 'income_prop' instead based on context + df_merged['sampleProbabilityString'] = df_merged.apply( + lambda row: create_sample_probability_string( + row['income_bin'], + row['income_prop'], # Changed from 'new_income_prob' which doesn't exist + row['ridehail_bin'], + row['ridehail_prop'] + ), + axis=1 + ) + + return df_merged + + +def create_atlas_emfac_crosswalk(car_emfac_fleet, work_dir, config): + """ + Create a crosswalk between EMFAC classes and bodytypes. + + Args: + car_emfac_fleet (pd.DataFrame): DataFrame containing EMFAC fleet + work_dir (str): Working directory path containing input files + config (dict): Configuration dictionary with data file paths and settings + + Returns: + pd.DataFrame: car_emfac with added bodytype and bodytype_prop columns, + and updated emfacId column combined with bodytype + """ + emfac_bodytype_df = pd.read_csv(os.path.join(work_dir, config["mapping"]["atlas"]["emfac"])) + result_rows = [] + for _, emfac_row in car_emfac_fleet.iterrows(): + emfac_class = emfac_row['vehicle_class'] + body_type_matches = [] + + for _, body_type_row in emfac_bodytype_df.iterrows(): + body_type = body_type_row['bodytype'].lower().capitalize() + if emfac_class in body_type_row.index and body_type_row[emfac_class] > 0: + proportion = body_type_row[emfac_class] + body_type_matches.append((body_type, proportion)) + + for body_type, proportion in body_type_matches: + new_row = emfac_row.copy() + new_row["bodytype"] = body_type + new_row["bodytype_prop"] = proportion + result_rows.append(new_row) + + # Create DataFrame from results + result_df = pd.DataFrame(result_rows) + result_df["pop"] = result_df["population"] * result_df["bodytype_prop"] + result_df["vmt"] = result_df["total_vmt"] * result_df["bodytype_prop"] + result_df["population"] = result_df["pop"] + result_df["total_vmt"] = result_df["vmt"] + total_population = result_df['population'].sum() + total_vmt = result_df['total_vmt'].sum() + if total_population > 0 and total_vmt > 0: + result_df["population_proportion"] = result_df['population'] / total_population + result_df["vmt_proportion"] = result_df['total_vmt'] / total_vmt + + car_emfac_fleet_with_bodytype = result_df[car_emfac_fleet.columns.tolist() + ["bodytype"]].copy() + return car_emfac_fleet_with_bodytype + + +def generate_emfac_mapped_passenger_vehicle_types(emfac_fleet, car_class, bike_class, transit_class, filter_out_classes, work_dir, config, format_func): + """ + Generate a passenger vehicle types with EMFAC mappings for different vehicle classes. + + This function processes vehicle types data and maps it to EMFAC vmt data for + cars, bikes, and transit vehicles. It creates a combined dataset that preserves the + distributions from both sources while mapping vehicle types to appropriate EMFAC categories. + + Args: + emfac_fleet (pd.DataFrame): EMFAC vmt and population data with vehicle classes and proportions + car_class (str): Identifier for car vehicle classes + bike_class (str): Identifier for bike vehicle classes + transit_class (str): Identifier for transit vehicle classes + filter_out_classes (list): classes to filter out, specifically freight classes + format_func (function): Function to format vehicle types data + work_dir (str): Working directory path + config (dict): Configuration dictionary with keys: + - beam.pax_vehicle_types_file: Path to vehicle types file + - mappedFuel: Fuel configuration parameters + + Returns: + pd.DataFrame: Combined and mapped passenger vehicle types with EMFAC IDs + """ + # Load vehicle types file + vehicle_types_file = os.path.join(work_dir, f"{config['beam']['pax_vehicle_types_file']}") + + # Read and filter vehicle types + vehicle_types_raw = pd.read_csv(vehicle_types_file, dtype=str) + vehicle_types_filtered = vehicle_types_raw[~vehicle_types_raw["vehicleCategory"].isin(filter_out_classes)] + + # Create masks for filtering + car_bike_mask = vehicle_types_filtered['vehicleCategory'].isin([car_class, bike_class]) + + # Fix the bus mask - original had a logical error using & instead of bitwise & + bus_mask = (vehicle_types_filtered['vehicleCategory'] == transit_class) & \ + (vehicle_types_filtered['vehicleTypeId'].str.lower().str.contains('bus')) + + # Format the filtered vehicle types + filtered_vehicle_types = vehicle_types_filtered.loc[car_bike_mask | bus_mask].copy() + vehicle_types = format_func(filtered_vehicle_types) + + # ################################################################################################### + # CAR + # ################################################################################################### + + # Process car data + car_emfac_fleet = emfac_fleet[emfac_fleet["mappedClass"].isin([car_class])].copy() + + # Process car data with probabilities + car_vehicle_types = vehicle_types[vehicle_types['mappedClass'].isin([car_class])].copy() + processed_car_types = process_vehicle_types_probabilities_by_vehicle_category_and_income_group(car_vehicle_types) + + if config["mapping"]["atlas"]["enable_atlas_emfac_crosswalk"]: + atlas_emfac_fleet = create_atlas_emfac_crosswalk(car_emfac_fleet, work_dir, config) + car_beam_emfac = emfac2passenger_with_atlas_crosswalk(processed_car_types, atlas_emfac_fleet, work_dir, config) + else: + car_beam_emfac = emfac2passenger_by_category_income(processed_car_types, car_emfac_fleet, config) + + # Select only necessary columns from the result + car_beam_emfac["oldVehicleTypeId"] = car_beam_emfac["vehicleTypeId"] + car_beam_emfac['vehicleTypeId'] = car_beam_emfac.apply( + lambda row: str( + row["emfacId"]) + "--" + + sanitize_name(row["bodytype"]).replace("_", "") + "--" + + sanitize_name(row["oldVehicleTypeId"]).replace("_", ""), axis=1 + ) + car_beam_emfac = car_beam_emfac[vehicle_types_filtered.columns.tolist() + ["emfacId", "oldVehicleTypeId"]] + + # ################################################################################################### + # BIKE + # ################################################################################################### + + # Process bike data + bike_emfac = emfac_fleet[emfac_fleet["mappedClass"].isin([bike_class])].copy() + bike_pop_sum = bike_emfac['total_vmt'].sum() + if bike_pop_sum > 0: + bike_emfac['vmt_normalized'] = bike_emfac['total_vmt'] / bike_pop_sum + else: + bike_emfac['vmt_normalized'] = 0.0 # Default value if no range + + # Merge bike data + bike_beam_emfac = pd.merge( + left=vehicle_types[vehicle_types['mappedClass'].isin([bike_class])], + right=bike_emfac, + left_on=['mappedClass', 'mappedFuel'], + right_on=['mappedClass', 'mappedFuel'], + how='outer' + ) + + # Calculate new proportion for bikes + # The original had a possible bug with 'prob_category' - changed to 'sampleProbabilityWithinCategory' + bike_beam_emfac['sampleProbabilityWithinCategory'] = bike_beam_emfac.apply( + lambda row: 1 if pd.isna(row['sampleProbabilityWithinCategory']) or + row['sampleProbabilityWithinCategory'] == '' + else float(row['sampleProbabilityWithinCategory']) * row['vmt_normalized'], + axis=1 + ) + + # Select bike columns + bike_beam_emfac = bike_beam_emfac[vehicle_types_filtered.columns.tolist() + ["emfacId"]] + bike_beam_emfac["oldVehicleTypeId"] = bike_beam_emfac["vehicleTypeId"] + bike_beam_emfac['vehicleTypeId'] = bike_beam_emfac.apply( + lambda row: str(row["emfacId"]) + "--" + sanitize_name(row["oldVehicleTypeId"]).replace("_", "") + , axis=1) + + # ################################################################################################### + # BUS + # ################################################################################################### + + # Process bus data + bus_emfac = emfac_fleet[emfac_fleet["mappedClass"] == transit_class].copy() + bus_pop_sum = bus_emfac['total_vmt'].sum() + if bus_pop_sum > 0: + bus_emfac['vmt_normalized'] = bus_emfac['total_vmt'] / bus_pop_sum + else: + bus_emfac['vmt_normalized'] = 0.0 # Default value if no range + + # Bus mask for filter - corrected syntax for filtering + bus_types_mask = (vehicle_types['mappedClass'] == transit_class) & \ + (vehicle_types['vehicleTypeId'].str.lower().str.contains('bus')) + + # Merge bus data - using the corrected mask + bus_beam_emfac_merged = pd.merge( + left=vehicle_types[bus_types_mask], + right=bus_emfac, + on=['mappedClass', 'mappedFuel'], + how='outer' + ) + + bus_beam_emfac = bus_beam_emfac_merged.groupby('vehicleTypeId').apply( + lambda x: x.sample(n=1, weights='vmt_normalized', replace=True) if len(x) > 0 else x + ).reset_index(drop=True) + + # Select bus columns + bus_beam_emfac = bus_beam_emfac[vehicle_types_filtered.columns.tolist() + ["emfacId"]] + bus_beam_emfac["oldVehicleTypeId"] = bus_beam_emfac["vehicleTypeId"] + + # Combine all vehicle types + result = pd.concat([car_beam_emfac, bike_beam_emfac, bus_beam_emfac], ignore_index=True) + processed_ids = result["oldVehicleTypeId"].unique() + vehicle_types_others = vehicle_types_filtered.loc[~(car_bike_mask | bus_mask)].copy() + vehicle_types_others = vehicle_types_others[~vehicle_types_others["vehicleTypeId"].isin(processed_ids)] + return result, vehicle_types_others + + +def generate_fleet_from_vehicle_types(mapped_vehicle_types, car_class, bike_class, work_dir, config): + """ + Update vehicle.csv file by sampling from new vehicle types based on original vehicleTypeId. + This highly optimized function uses vectorized operations and eliminates loops where possible. + + Args: + mapped_vehicle_types (pd.DataFrame): DataFrame containing mapped vehicle types + car_class (str): Identifier for car vehicle class + bike_class (str): Identifier for bike vehicle class + work_dir (str): Working directory path + config (dict): Configuration dictionary with beam.pax_vehicles_file key + + Returns: + pd.DataFrame: Updated vehicles DataFrame with new vehicleTypeIds and stateOfCharge values + """ + # Read the vehicle.csv file + vehicles_file_path = os.path.join(work_dir, config["beam"]["pax_vehicles_file"]) + vehicles_df = pd.read_csv(vehicles_file_path) + + # Create new columns in advance + vehicles_df['oldVehicleTypeId'] = vehicles_df['vehicleTypeId'] + vehicles_df['stateOfCharge'] = "" + + # Filter vehicle types to only cars and bikes (do this once) + car_bike_mask = mapped_vehicle_types['vehicleCategory'].isin([car_class, bike_class]) + filtered_vehicle_types = mapped_vehicle_types.loc[car_bike_mask].copy() + + # Ensure sampleProbabilityWithinCategory is numeric (do this once) + filtered_vehicle_types['sampleProbabilityWithinCategory'] = pd.to_numeric( + filtered_vehicle_types['sampleProbabilityWithinCategory'], errors='coerce' + ).fillna(0) + + # Pre-process and organize vehicle types by category + vehicle_types_by_category = { + car_class: filtered_vehicle_types[filtered_vehicle_types['vehicleCategory'] == car_class], + bike_class: filtered_vehicle_types[filtered_vehicle_types['vehicleCategory'] == bike_class] + } + + # Pre-process and organize vehicle types by original type (if oldVehicleTypeId exists) + vehicle_types_by_original = {} + if 'oldVehicleTypeId' in filtered_vehicle_types.columns: + for orig_id in filtered_vehicle_types['oldVehicleTypeId'].unique(): + if pd.notna(orig_id) and orig_id: + matches = filtered_vehicle_types[filtered_vehicle_types['oldVehicleTypeId'] == orig_id] + if len(matches) > 0: + weights = matches['sampleProbabilityWithinCategory'].values + weights_sum = np.sum(weights) + if weights_sum > 0: + weights = weights / weights_sum + vehicle_types_by_original[str(orig_id)] = (matches, weights) + + # Create lookup table of original type IDs to categories + unique_vehicle_types = vehicles_df['vehicleTypeId'].unique() + vehicle_categories = {} + for vtype in unique_vehicle_types: + vtype_str = str(vtype) + if 'BIKE' in vtype_str.upper(): + vehicle_categories[vtype_str] = bike_class + else: + vehicle_categories[vtype_str] = car_class + + # Process in larger batches for better performance + batch_size = 10000 # Increased batch size + total_vehicles = len(vehicles_df) + num_batches = (total_vehicles + batch_size - 1) // batch_size + + with tqdm(total=total_vehicles, desc="Processing vehicles") as pbar: + for batch_idx in range(num_batches): + start_idx = batch_idx * batch_size + end_idx = min(start_idx + batch_size, total_vehicles) + batch = vehicles_df.iloc[start_idx:end_idx] + + # Create arrays to hold new values + new_vehicle_type_ids = [] + states_of_charge = [] + + # Group vehicles by original type to process in chunks + for original_type_id, group_indices in batch.groupby('vehicleTypeId').groups.items(): + original_type_id_str = str(original_type_id) + group_size = len(group_indices) + + # Get matching vehicle types for this original type + if original_type_id_str in vehicle_types_by_original: + matches, weights = vehicle_types_by_original[original_type_id_str] + else: + # If no direct match, use vehicle category + category = vehicle_categories[original_type_id_str] + matches = vehicle_types_by_category[category] + weights = matches['sampleProbabilityWithinCategory'].values + weights_sum = np.sum(weights) + if weights_sum > 0: + weights = weights / weights_sum + # Cache for future use + vehicle_types_by_original[original_type_id_str] = (matches, weights) + + # Sample vehicle types for the entire group at once + if len(matches) > 0: + if np.sum(weights) > 0: + sampled_indices = np.random.choice( + len(matches), size=group_size, p=weights, replace=True + ) + else: + sampled_indices = np.random.randint(0, len(matches), size=group_size) + + # Get sampled vehicle types and fuel types + sampled_vehicles = matches.iloc[sampled_indices] + sampled_types = sampled_vehicles['vehicleTypeId'].values + is_electric = sampled_vehicles['primaryFuelType'].str.lower().str.contains('electricity', + na=False).values + + # Assign to ordered arrays + for i, idx in enumerate(group_indices): + # No need to calculate idx_in_batch since we're using arrays and appending + new_vehicle_type_ids.append(sampled_types[i]) + states_of_charge.append('1' if is_electric[i] else '') + else: + # Fallback if no matches (should rarely happen) + for _ in range(group_size): + new_vehicle_type_ids.append(original_type_id) + states_of_charge.append('') + + # Bulk update the batch + vehicles_df.loc[start_idx:end_idx - 1, 'vehicleTypeId'] = new_vehicle_type_ids + vehicles_df.loc[start_idx:end_idx - 1, 'stateOfCharge'] = states_of_charge + + pbar.update(end_idx - start_idx) + + return vehicles_df \ No newline at end of file diff --git a/src/main/python/emissions/emfac_data_processing.py b/src/main/python/emissions/emfac_data_processing.py deleted file mode 100644 index c259e31d7b6..00000000000 --- a/src/main/python/emissions/emfac_data_processing.py +++ /dev/null @@ -1,83 +0,0 @@ -from emissions_utils import * -import geopandas as gpd -import matplotlib.pyplot as plt - -emfac_regions = ["SF"] -model_dir = os.path.abspath(os.path.expanduser('~/Workspace/Models/emfac/2018')) -work_dir = os.path.abspath(os.path.expanduser("~/Workspace/Simulation/sfbay")) -run_dir = work_dir + "/beam-freight/2024-01-23/Baseline" - - -# ## Population ## -statewide_pop_file = model_dir + '/Default_Statewide_2018_Annual_fleet_data_population_20240311153419.csv' - -# Load the dataset from the uploaded CSV file -emfac_pop = prepare_emfac_emissions_for_mapping(statewide_pop_file, emfac_regions) - -# ## TRIPS ## -statewide_trips_filename = model_dir + '/Default_Statewide_2018_Annual_fleet_data_trips_20240311153419.csv' - -########################################### -# ## EMISSIONS RATES ## -# pd.set_option('display.max_columns', 20) -########################################### -regional_emfac_data_file = model_dir + '/imputed_MTC_emission_rate_agg_NH3_added.csv' - - -########################################### -# ## FRISM PLANS ## -########################################### -# freight_carriers = pd.read_csv("~/Workspace/Data/FREIGHT/sfbay/beam_freight/scenarios-23Jan2024/Base/freight-carriers.csv") -# freight_payloads = pd.read_csv("~/Workspace/Data/FREIGHT/sfbay/beam_freight/scenarios-23Jan2024/Base/freight-payload-plans.csv") -# freight_tours = pd.read_csv("~/Workspace/Data/FREIGHT/sfbay/beam_freight/scenarios-23Jan2024/Base/freight-tours.csv") -# -# # Plot the histogram for the 'Values' column -# freight_payloads['sequenceRank'].plot(kind='hist', bins=20, edgecolor='black') -# plt.title('Histogram of sequenceRank') -# plt.xlabel('Value') -# plt.ylabel('Frequency') -# plt.savefig(os.path.expanduser('~/Workspace/Data/FREIGHT/sfbay/beam_freight/scenarios-23Jan2024/Base/histogram_sequenceRank.png'), dpi=300, bbox_inches='tight') -# plt.clf() -# -# -# freight_payloads['operationDurationInSec'].plot(kind='hist', bins=20, edgecolor='black') -# plt.title('Histogram of operationDurationInSec') -# plt.xlabel('Value') -# plt.ylabel('Frequency') -# plt.savefig(os.path.expanduser('~/Workspace/Data/FREIGHT/sfbay/beam_freight/scenarios-23Jan2024/Base/histogram_operationDurationInSec.png'), dpi=300, bbox_inches='tight') -# plt.clf() -# -# freight_payloads['weightInKg'].plot(kind='hist', bins=20, edgecolor='black') -# plt.title('Histogram of weightInKg') -# plt.xlabel('Value') -# plt.ylabel('Frequency') -# plt.savefig(os.path.expanduser('~/Workspace/Data/FREIGHT/sfbay/beam_freight/scenarios-23Jan2024/Base/histogram_weightInKg.png'), dpi=300, bbox_inches='tight') -# plt.clf() -# -# freight_payloads['arrivalTimeWindowInSecUpper'].plot(kind='hist', bins=20, edgecolor='black') -# plt.title('Histogram of arrivalTimeWindowInSecUpper') -# plt.xlabel('Value') -# plt.ylabel('Frequency') -# plt.savefig(os.path.expanduser('~/Workspace/Data/FREIGHT/sfbay/beam_freight/scenarios-23Jan2024/Base/histogram_arrivalTimeWindowInSecUpper.png'), dpi=300, bbox_inches='tight') -# plt.clf() -# -# freight_payloads['estimatedTimeOfArrivalInSec'].plot(kind='hist', bins=20, edgecolor='black') -# plt.title('Histogram of estimatedTimeOfArrivalInSec') -# plt.xlabel('Value') -# plt.ylabel('Frequency') -# plt.savefig(os.path.expanduser('~/Workspace/Data/FREIGHT/sfbay/beam_freight/scenarios-23Jan2024/Base/histogram_estimatedTimeOfArrivalInSec.png'), dpi=300, bbox_inches='tight') -# plt.clf() -# -# freight_payloads['arrivalTimeWindowInSecLower'].plot(kind='hist', bins=20, edgecolor='black') -# plt.title('Histogram of arrivalTimeWindowInSecUpper') -# plt.xlabel('Value') -# plt.ylabel('Frequency') -# plt.savefig(os.path.expanduser('~/Workspace/Data/FREIGHT/sfbay/beam_freight/scenarios-23Jan2024/Base/histogram_arrivalTimeWindowInSecLower.png'), dpi=300, bbox_inches='tight') -# plt.clf() -# -# freight_tours['departureTimeInSec'].plot(kind='hist', bins=20, edgecolor='black') -# plt.title('Histogram of departureTimeInSec') -# plt.xlabel('Value') -# plt.ylabel('Frequency') -# plt.savefig(os.path.expanduser('~/Workspace/Data/FREIGHT/sfbay/beam_freight/scenarios-23Jan2024/Base/histogram_departureTimeInSec.png'), dpi=300, bbox_inches='tight') -# plt.clf() diff --git a/src/main/python/emissions/emfac_mapping_of_vehicle_population.py b/src/main/python/emissions/emfac_mapping_of_vehicle_population.py deleted file mode 100644 index b724c2fd047..00000000000 --- a/src/main/python/emissions/emfac_mapping_of_vehicle_population.py +++ /dev/null @@ -1,272 +0,0 @@ -from emissions_utils import * -pd.set_option('display.max_columns', 20) - -# HEADER -# ### File Paths ### -# mesozones_lookup_file = os.path.expanduser("~/Workspace/Simulation/sfbay/geo/zonal_id_lookup_final.csv") -# county_data_file = os.path.expanduser("~/Workspace/Simulation/sfbay/geo/sfbay_counties_wgs84.geojson") -# cbg_data_file = os.path.expanduser("~/Workspace/Simulation/sfbay/geo/sfbay_cbgs_wgs84.geojson") -# taz_data_file = os.path.expanduser("~/Workspace/Simulation/sfbay/geo/sfbay_tazs_epsg26910.geojson") -# mesozones_to_county_file = os.path.expanduser("~/Workspace/Simulation/sfbay/geo/mesozones_to_county.csv") -emfac_population_file = os.path.expanduser('~/Workspace/Models/emfac/Default_Statewide_2018_2025_2030_2040_2050_Annual_population_20240612233346.csv') -emfac_emissions_file = os.path.expanduser('~/Workspace/Models/emfac/imputed_MTC_emission_rate_agg_NH3_added_2018_2025_2030_2040_2050.csv') - -runFT = True # Run Freight Emissions Mapping -runPAX = False # Run Passenger Emissions Mapping -ft_iteration = "2024-01-23" -area = "sfbay" -## -# emfac_year, ft_year, ft_scenario, pax_year, pax_scenario = 2050, 2050, "HOPhighp2", 2045, "LowTech" -# emfac_year, ft_year, ft_scenario, pax_year, pax_scenario = 2018, 2018, "Baseline", 2018, "Baseline" -# emfac_year, ft_year, ft_scenario, pax_year, pax_scenario = 2050, 2050, "Refhighp6", 2045, "LowTech" -emfac_year, ft_year, ft_scenario, pax_year, pax_scenario = 2050, 2050, "HOPhighp6", 2045, "LowTech" -## -input_dir = os.path.expanduser(f"~/Workspace/Simulation/{area}/beam-freight/{ft_iteration}") -carriers_file = f"{input_dir}/{str(ft_year)}_{ft_scenario}/carriers--{str(ft_year)}-{ft_scenario}.csv" -payloads_file = f"{input_dir}/{str(ft_year)}_{ft_scenario}/payloads--{str(ft_year)}-{ft_scenario}.csv" -ft_vehicle_types_file = f"{input_dir}/vehicle-tech/ft-vehicletypes--{str(ft_year)}-{ft_scenario}.csv" -pax_vehicle_types_file = f"{input_dir}/vehicle-tech/pax-vehicletypes--{str(pax_year)}-{pax_scenario}.csv" - -# ################## - -# output -ft_filtered_out_emissions_file = f"{input_dir}/vehicle-tech/ft-filtered-out--{str(ft_year)}-{ft_scenario}-TrAP.csv" -ft_vehicle_types_emissions_file = f"{input_dir}/vehicle-tech/ft-vehicletypes--{str(ft_year)}-{ft_scenario}-TrAP.csv" -ft_carriers_emissions_file = f"{input_dir}/{str(ft_year)}_{ft_scenario}/carriers--{str(ft_year)}-{ft_scenario}-TrAP.csv" -ft_emissions_rates_relative_filepath = f"TrAP/{str(ft_year)}-FT-{ft_scenario}" - -pax_filtered_out_emissions_file = f"{input_dir}/vehicle-tech/pax-filtered-out-TrAP.csv" -pax_vehicle_types_emissions_file = f"{input_dir}/vehicle-tech/pax-vehicletypes--{str(pax_year)}-{pax_scenario}-TrAP.csv" -pax_emissions_rates_relative_filepath = f"TrAP/{str(pax_year)}-Pax-{pax_scenario}" - -# combine_csv_files( -# [ -# os.path.expanduser('~/Workspace/Models/emfac/imputed_MTC_emission_rate_agg_NH3_added_2018.csv'), -# os.path.expanduser('~/Workspace/Models/emfac/imputed_MTC_emission_rate_agg_NH3_added_2025.csv'), -# os.path.expanduser('~/Workspace/Models/emfac/imputed_MTC_emission_rate_agg_NH3_added_2030.csv'), -# os.path.expanduser('~/Workspace/Models/emfac/imputed_MTC_emission_rate_agg_NH3_added_2040.csv'), -# os.path.expanduser('~/Workspace/Models/emfac/imputed_MTC_emission_rate_agg_NH3_added_2050.csv') -# ] -# , emfac_emissions_file) -# - -ft_fuel_mapping_assumptions = { - 'Dsl': 'Diesel', - 'Gas': 'Diesel', - 'NG': 'Diesel', - 'Elec': 'Electricity', - 'Phe': 'PlugInHybridElectricity', - 'H2fc': 'Electricity' -} -# -pax_fuel_mapping_assumptions = { - 'Dsl': 'Diesel', - 'Gas': 'Gasoline', - 'NG': 'Diesel', - 'Elec': 'Electricity', - 'Phe': 'PlugInHybridElectricity', - 'H2fc': 'Electricity', - 'BioDsl': 'Diesel' -} - - - - -# ######### MAIN ########## - -print(f"Scenario {area}, {str(ft_year)}-{ft_scenario} from {ft_iteration}..") -# all the readings: -ft_payloads = pd.read_csv(payloads_file) -ft_vehicle_types = pd.read_csv(ft_vehicle_types_file) -pax_vehicle_types = pd.read_csv(pax_vehicle_types_file) -ft_carriers = pd.read_csv(carriers_file, dtype=str) - -# ['Dsl', 'Elec', 'Gas', 'Phe', 'NG'] -print("Processing emfac population and rates") -emfac_population = pd.read_csv(emfac_population_file, low_memory=False, dtype=str) -emfac_population['population'] = pd.to_numeric(emfac_population['population'], errors='coerce') -pax_emfac_class_map, ft_emfac_class_map = create_vehicle_class_mapping(emfac_population["vehicle_class"].unique()) - - -emissions_rates = pd.read_csv(emfac_emissions_file, low_memory=False, dtype={ - 'calendar_year': int, - 'season_month': str, - 'sub_area': str, - 'vehicle_class': str, - 'fuel': str, - 'temperature': float, - 'relative_humidity': float, - 'process': str, - 'speed_time': float, - 'pollutant': str, - 'emission_rate': float -}) -filtered_rates = emissions_rates[ - emissions_rates["sub_area"].str.contains(fr"\({re.escape(region_to_emfac_area[area])}\)", case=False, na=False) & - (emissions_rates["calendar_year"] == emfac_year) - ] - - -# ### PASSENGER ### -if runPAX: - print("\nMapping EMFAC for passengers!") - # EMFAC Rates - pax_emissions_rates_for_mapping = prepare_emfac_emissions_for_mapping( - filtered_rates, - pax_emfac_class_map - ) - print(f"EMFAC Passenger Rates => rows: {len(pax_emissions_rates_for_mapping)}, " - f"classes: {len(pax_emissions_rates_for_mapping['emfacClass'].unique())}, " - f"fuel: {len(pax_emissions_rates_for_mapping['emfacFuel'].unique())}") - - # EMFAC Population - emfac_passenger_population_for_mapping = prepare_emfac_population_for_mapping( - emfac_population, - emfac_year, - pax_emfac_class_map, - pax_fuel_mapping_assumptions - ) - print(f"EMFAC Passenger Population => rows: {len(emfac_passenger_population_for_mapping)}, " - f"classes: {len(emfac_passenger_population_for_mapping['emfacClass'].unique())}, " - f"fuel: {len(emfac_passenger_population_for_mapping['emfacFuel'].unique())}") - - # Passenger Population - pax_population_for_mapping = prepare_pax_vehicle_population_for_mapping( - pax_vehicle_types, - pax_fuel_mapping_assumptions - ) - print(f"BEAM Passenger Population => rows: {len(pax_population_for_mapping)}, " - f"classes: {len(pax_population_for_mapping['beamClass'].unique())}, " - f"fuel: {len(pax_population_for_mapping['beamFuel'].unique())}") - - print("------------------------------------------------------------------") - print("Distributing passenger vehicle classes from EMFAC across BEAM population...") - updated_passenger_vehicle_types = build_new_pax_vehtypes( - emfac_passenger_population_for_mapping, - pax_population_for_mapping - ) - print(f"Previous vehicle types had {len(pax_population_for_mapping)} types " - f"while the new set has {len(updated_passenger_vehicle_types)} types") - - print("------------------------------------------------------------------") - print("Formatting Passenger EMFAC rates for BEAM") - pax_emfac_formatted, pax_emfac_filtered_out = format_rates_for_beam(pax_emissions_rates_for_mapping) - pax_emfac_filtered_out.to_csv(pax_filtered_out_emissions_file) - print(f"Filtered out passenger processes with all zeros emissions, verify output here => {pax_filtered_out_emissions_file}") - - print("------------------------------------------------------------------") - print("Assigning Passenger emissions rates to new set of vehicle types") - pax_vehicle_types_with_emissions_rates = assign_emissions_rates_to_vehtypes( - pax_emfac_formatted, - updated_passenger_vehicle_types, - input_dir + "/vehicle-tech", - pax_emissions_rates_relative_filepath - ) - - # Create a new dataframe with the missing rows - print("------------------------------------------------------------------") - print("Adding back Passenger vehicle types not mapped with EMFAC") - index_population = set(pax_population_for_mapping.index) - index_vehicle_types = set(pax_vehicle_types.index) - missing_rows = index_vehicle_types - index_population - missing_df = pax_vehicle_types.loc[list(missing_rows)] - missing_df["emissionsRatesFile"] = "" - pax_emfac_vehicletypes = pd.concat([pax_vehicle_types_with_emissions_rates[missing_df.columns], missing_df], axis=0) - pax_emfac_vehicletypes.to_csv(pax_vehicle_types_emissions_file, index=False) - - print("Done mapping EMFAC for passengers!") - - -# ************** -# FREIGHT -# ************** -if runFT: - print("\nMapping EMFAC for freight!") - # EMFAC Rates - ft_emissions_rates_for_mapping = prepare_emfac_emissions_for_mapping( - filtered_rates, - ft_emfac_class_map - ) - print(f"EMFAC Freight Rates => rows: {len(ft_emissions_rates_for_mapping)}, " - f"classes: {len(ft_emissions_rates_for_mapping['emfacClass'].unique())}, " - f"fuel: {len(ft_emissions_rates_for_mapping['emfacFuel'].unique())}") - - ft_emfac_pop_for_mapping = prepare_emfac_population_for_mapping( - emfac_population, - emfac_year, - ft_emfac_class_map, - ft_fuel_mapping_assumptions - ) - print(f"EMFAC Freight Population => rows: {len(ft_emfac_pop_for_mapping)}, " - f"classes: {len(ft_emfac_pop_for_mapping['emfacClass'].unique())}, " - f"fuel: {len(ft_emfac_pop_for_mapping['emfacFuel'].unique())}") - - # - ft_population_for_mapping = prepare_ft_vehicle_population_for_mapping( - ft_carriers, - ft_payloads, - ft_vehicle_types, - ft_fuel_mapping_assumptions - ) - print(f"BEAM Freight Population => rows: {len(ft_population_for_mapping)}, " - f"classes: {len(ft_population_for_mapping['beamClass'].unique())}, " - f"fuel: {len(ft_population_for_mapping['beamFuel'].unique())}") - unique_vehicles = set(ft_carriers["vehicleId"].unique()) - set(ft_population_for_mapping["vehicleId"].unique()) - if len(unique_vehicles) > 0: - print(f"Failed to map, maybe some vehicles in carriers were not used in payload plans:") - print(unique_vehicles) - - - ### - print("------------------------------------------------------------------") - print("Distributing freight vehicle classes from EMFAC across BEAM population...") - updated_freight_population = distribution_based_vehicle_classes_assignment( - ft_population_for_mapping, - ft_emfac_pop_for_mapping - ) - missing_classes = set(ft_emfac_pop_for_mapping['emfacClass'].unique()) - set(updated_freight_population['emfacClass'].unique()) - missing_fuel = set(ft_emfac_pop_for_mapping['emfacFuel'].unique()) - set(updated_freight_population['emfacFuel'].unique()) - if len(missing_classes) > 0 or len(missing_fuel) > 0: - print(f"Failed to match these classes {missing_classes} and fuel {missing_fuel}") - - - ### - print("------------------------------------------------------------------") - print("Building new set of freight vehicle types") - updated_vehicle_types = build_new_ft_vehtypes(updated_freight_population, ft_vehicle_types) - print(f"Previous vehicle types had {len(ft_vehicle_types)} types while the new set has {len(updated_vehicle_types)} types") - ### - print("------------------------------------------------------------------") - print("Assigning new freight vehicle types to carriers") - updated_carriers = assign_new_ft_vehtypes_to_carriers(ft_carriers, updated_freight_population, ft_carriers_emissions_file) - unique_vehicles = set(ft_carriers["vehicleId"].unique()) - set(updated_carriers["vehicleId"].unique()) - if len(unique_vehicles) > 0: - print(f"Failed to assign vehicle types to these vehicles: {unique_vehicles}") - - - ### - print("------------------------------------------------------------------") - print("Formatting EMFAC freight rates for BEAM") - ft_emfac_formatted, ft_emfac_filtered_out = format_rates_for_beam(ft_emissions_rates_for_mapping) - ft_emfac_filtered_out.to_csv(ft_filtered_out_emissions_file) - print(f"Filtered out freight processes with all zeros emissions, verify output here => {ft_filtered_out_emissions_file}") - - ### - print("------------------------------------------------------------------") - print("Assigning freight emissions rates to new set of vehicle types") - ft_vehicle_types_with_emissions_rates = assign_emissions_rates_to_vehtypes( - ft_emfac_formatted, - updated_vehicle_types, - input_dir + "/vehicle-tech", - ft_emissions_rates_relative_filepath - ) - - print("------------------------------------------------------------------") - unique_ft_vehicle_types = set(updated_vehicle_types["vehicleTypeId"].unique()) - set(ft_vehicle_types_with_emissions_rates["vehicleTypeId"].unique()) - if len(unique_ft_vehicle_types) > 0: - print(f"Failed to assign emissions rates to these vehicle types: {unique_ft_vehicle_types}") - - print(f"Writing {ft_vehicle_types_emissions_file}") - updated_vehicle_types.to_csv(ft_vehicle_types_emissions_file, index=False) - -print("End") diff --git a/src/main/python/emissions/emissions_skims_processing.py b/src/main/python/emissions/emissions_skims_processing.py deleted file mode 100644 index d8bbbdea0f8..00000000000 --- a/src/main/python/emissions/emissions_skims_processing.py +++ /dev/null @@ -1,190 +0,0 @@ -from emissions_utils import * -from pathlib import Path -pd.set_option('display.max_columns', 20) - - -# ################ -# #### Header #### -# ################ - -# Input -area = "sfbay" -batch = "2024-01-23" -mode_to_filter = "-TRUCK-" -expansion_factor = 1/0.1 -source_epsg = "EPSG:26910" -selected_pollutants = ['PM2_5', 'NOx', 'CO', 'ROG', 'CO2', 'HC'] -h3_resolution = 8 # Adjust as needed -emfac_vmt_file = os.path.expanduser(f"~/Workspace/Models/emfac/Default_Statewide_2018_2025_2030_2040_2050_Annual_vmt_20240612233346.csv") -run_dir = os.path.expanduser(f"~/Workspace/Simulation/{area}/beam-runs/{batch}") -scenario_2018 = "2018_Baseline" -scenario_2050 = "2050_Refhighp6" -skims_2018_file = f"{run_dir}/{scenario_2018}/0.skimsEmissions.csv.gz" -skims_2050_file = f"{run_dir}/{scenario_2050}/0.skimsEmissions.csv.gz" -network_file = f"{run_dir}/network.csv.gz" -plan_dir = os.path.expanduser(f"~/Workspace/Simulation/{area}/beam-freight/{batch}") -types_2018_file = f"{plan_dir}/vehicle-tech/ft-vehicletypes--{scenario_2018.replace('_', '-')}-TrAP.csv" -types_2050_file = f"{plan_dir}/vehicle-tech/ft-vehicletypes--{scenario_2050.replace('_', '-')}-TrAP.csv" -tours_2018_file = f"{plan_dir}/{scenario_2018}/tours--{scenario_2018.replace('_', '-')}.csv" -tours_2050_file = f"{plan_dir}/{scenario_2050}/tours--{scenario_2050.replace('_', '-')}.csv" -carriers_2018_file = f"{plan_dir}/{scenario_2018}/carriers--{scenario_2018.replace('_', '-')}-TrAP.csv" -carriers_2050_file = f"{plan_dir}/{scenario_2050}/carriers--{scenario_2050.replace('_', '-')}-TrAP.csv" - -# Output -plot_dir = f'{run_dir}/_plots' -Path(plot_dir).mkdir(parents=True, exist_ok=True) - -# ################ -# ##### Main ##### -# ################ - -scenario_2018_label = scenario_2018.replace("_", " ") -scenario_2050_label = scenario_2050.replace("_", " ").replace("HOPhighp2", "HAVF") - -# Network -network = load_network(network_file, source_epsg) -network_h3_intersection = generate_h3_intersections(network, h3_resolution, run_dir) -network_h3_intersection.to_csv(f'{run_dir}/network.h3.csv', index=False) - -# Skims -skims_2018 = read_skims_emissions_chunked( - skims_2018_file, - types_2018_file, - mode_to_filter, - network, - expansion_factor, - scenario_2018_label -) -skims_2050 = read_skims_emissions_chunked( - skims_2050_file, - types_2050_file, - mode_to_filter, - network, - expansion_factor, - scenario_2050_label -) -skims = pd.concat([skims_2018, skims_2050]) -print(f"Read {len(skims)} rows of skims") -# fast_df_to_gzip(skims, f'{run_dir}/skims_{scenario_2018}_{scenario_2050}.csv.gz') - -# FAMOS Tours -tours_2018 = pd.read_csv(tours_2018_file)[["tourId", 'departureTimeInSec']] -tours_2050 = pd.read_csv(tours_2050_file)[["tourId", 'departureTimeInSec']] -carriers_2018 = pd.read_csv(carriers_2018_file)[["tourId", 'vehicleTypeId']] -carriers_2050 = pd.read_csv(carriers_2050_file)[["tourId", 'vehicleTypeId']] -types_2018 = pd.read_csv(types_2018_file)[["vehicleTypeId", 'vehicleCategory', 'primaryFuelType', 'secondaryFuelType']] -types_2050 = pd.read_csv(types_2050_file)[["vehicleTypeId", 'vehicleCategory', 'primaryFuelType', 'secondaryFuelType']] - -tours_types_2018 = pd.merge(tours_2018, pd.merge(carriers_2018, types_2018, on="vehicleTypeId"), on="tourId") -tours_types_2018["scenario"] = scenario_2018_label -tours_types_2050 = pd.merge(tours_2050, pd.merge(carriers_2050, types_2050, on="vehicleTypeId"), on="tourId") -tours_types_2050["scenario"] = scenario_2050_label -famos_tours = pd.concat([tours_types_2018, tours_types_2050]) - -# FAMOS VMT -# Group by scenario, hour, and fuel_class, sum annualHourlyMVMT -famos_vmt = skims.groupby(['scenario', 'hour', 'beamFuel', 'class'])['vmt'].sum().reset_index().copy() - -# EMFAC VMT -emfac_famos_vmt = create_model_vmt_comparison_chart( - emfac_vmt_file, area, 2050, skims, scenario_2050_label, plot_dir -) - -# Processes -driving_process_activity = skims[ - (skims["process"].isin(["RUNEX", "PMBW", "PMTW", "RUNLOSS"])) & - (skims["vht"] > 0) -].groupby(["scenario", "linkId"])["vmt"].sum().reset_index(name="vmt") -h3_vmt = process_h3_data(network_h3_intersection, driving_process_activity, "vmt") -vmt_column = "Weighted VMT from driving activities" -h3_vmt.rename(columns={"weighted_vmt": vmt_column}, inplace=True) - -parking_process_activity = skims[ - (skims["process"].isin(["STREX", "DIURN", "HOTSOAK", "RUNLOSS", "IDLEX"])) & - (skims["vht"] == 0) -].groupby(["scenario", "linkId"]).size().reset_index(name='count') -h3_count = process_h3_data(network_h3_intersection, parking_process_activity, "count") -count_column = "Weighted count of parking activities" -h3_count.rename(columns={"weighted_count": count_column}, inplace=True) - -# Emissions -pm25 = process_h3_emissions(skims, network_h3_intersection, 'PM2_5') -nox = process_h3_emissions(skims, network_h3_intersection, 'NOx') -co = process_h3_emissions(skims, network_h3_intersection, 'CO') -co2 = process_h3_emissions(skims, network_h3_intersection, 'CO2') -# -pm25_column = "PM2_5 in grams per square meter" -pm25[pm25_column] = pm25["PM2_5"] * 1e6 # from metric ton to gram -# -nox_column = "NOx in grams per square meter" -nox[nox_column] = nox["NOx"] * 1e6 # from metric ton to gram -# -co_column = "CO in grams per square meter" -co[co_column] = co["CO"] * 1e6 # from metric ton to gram -# -co2_column = "CO2 in grams per square meter" -co2[co2_column] = co2["CO2"] * 1e6 # from metric ton to gram - -# Delta Emissions -pm25_delta = pm25.pivot(index='h3_cell', columns='scenario', values='PM2_5').reset_index() -pm25_delta = pm25_delta.fillna(0) -pm25_delta["scenario"] = "-".join([scenario_2050_label, scenario_2018_label]) -pm25_delta['Delta_PM2_5'] = pm25_delta[scenario_2050_label] - pm25_delta[scenario_2018_label] -pm25_delta_column = "Delta PM2_5 in grams per square meter" -pm25_delta[pm25_delta_column] = pm25_delta["Delta_PM2_5"] * 1e6 # from metric ton to gram -# -nox_delta = nox.pivot(index='h3_cell', columns='scenario', values='NOx').reset_index() -nox_delta = nox_delta.fillna(0) -nox_delta["scenario"] = "-".join([scenario_2050_label, scenario_2018_label]) -nox_delta['Delta_NOx'] = nox_delta[scenario_2050_label] - nox_delta[scenario_2018_label] -nox_delta_column = "Delta NOx in grams per square meter" -nox_delta[nox_delta_column] = nox_delta["Delta_NOx"] * 1e6 # from metric ton to gram -# -co2_delta = co2.pivot(index='h3_cell', columns='scenario', values='CO2').reset_index() -co2_delta = co2_delta.fillna(0) -co2_delta["scenario"] = "-".join([scenario_2050_label, scenario_2018_label]) -co2_delta['Delta_CO2'] = co2_delta[scenario_2050_label] - co2_delta[scenario_2018_label] -co2_delta_column = "Delta CO2 in grams per square meter" -co2_delta[co2_delta_column] = co2_delta["Delta_CO2"] * 1e6 # from metric ton to gram - - - -# ################ -# ### Plotting ### -# ################ -# Figure 1 -plot_hourly_activity(famos_tours, plot_dir, height_size=6) -plot_hourly_vmt(famos_vmt, plot_dir, height_size=6) -# Figure 2 -plot_multi_pie_emfac_famos_vmt(emfac_famos_vmt, plot_dir) -# Figure 3 -plot_h3_heatmap(h3_vmt, vmt_column, scenario_2018_label, plot_dir, is_delta=False, remove_outliers=True, in_log_scale=True) -plot_h3_heatmap(h3_count, count_column, scenario_2018_label, plot_dir, is_delta=False, remove_outliers=True, in_log_scale=True) -# Figure 4 -plot_h3_heatmap(pm25, pm25_column, scenario_2018_label, plot_dir, is_delta=False, remove_outliers=True, in_log_scale=True) -plot_h3_heatmap(nox, nox_column, scenario_2018_label, plot_dir, is_delta=False, remove_outliers=True, in_log_scale=True) -# plot_h3_heatmap(co, co_column, scenario_2018_label, plot_dir, is_delta=False, remove_outliers=True, in_log_scale=True) -plot_h3_heatmap(co2, co2_column, scenario_2018_label, plot_dir, is_delta=False, remove_outliers=True, in_log_scale=True) -# Figure 5 -plot_hourly_emissions_by_scenario_class_fuel(skims, 'PM2_5', plot_dir, plot_legend=True, height_size=6, font_size=24) -plot_hourly_emissions_by_scenario_class_fuel(skims, 'NOx', plot_dir, plot_legend=True, height_size=6, font_size=24) -#plot_hourly_emissions_by_scenario_class_fuel(skims, 'CO', plot_dir, plot_legend=True, height_size=6, font_size=24) -#plot_hourly_emissions_by_scenario_class_fuel(skims, 'SOx', plot_dir, plot_legend=True, height_size=6, font_size=24) -#plot_hourly_emissions_by_scenario_class_fuel(skims, 'NOx', plot_dir, plot_legend=False, height_size=11, font_size=30) -plot_hourly_emissions_by_scenario_class_fuel(skims, 'CO2', plot_dir, plot_legend=True, height_size=6, font_size=24) -# Figure 6 -plot_h3_heatmap(pm25_delta, pm25_delta_column, "-".join([scenario_2050_label, scenario_2018_label]), plot_dir, is_delta=True, remove_outliers=True, in_log_scale=True) -plot_h3_heatmap(nox_delta, nox_delta_column, "-".join([scenario_2050_label, scenario_2018_label]), plot_dir, is_delta=True, remove_outliers=True, in_log_scale=True) -plot_h3_heatmap(co2_delta, co2_delta_column, "-".join([scenario_2050_label, scenario_2018_label]), plot_dir, is_delta=True, remove_outliers=True, in_log_scale=True) -# Figure 7 - -plot_pollution_variability_by_process_vehicle_types(skims, "PM2_5", scenario_2018_label, plot_dir, height_size=6, font_size=24) -plot_pollution_variability_by_process_vehicle_types(skims, "NOx", scenario_2018_label, plot_dir, height_size=6, font_size=24) -#plot_pollution_variability_by_process_vehicle_types(skims, "CO", scenario_2018_label, plot_dir, height_size=6, font_size=24) -#plot_pollution_variability_by_process_vehicle_types(skims, "SOx", scenario_2018_label, plot_dir, height_size=6, font_size=24) -plot_pollution_variability_by_process_vehicle_types(skims, "CO2", scenario_2018_label, plot_dir, height_size=6, font_size=24) - -plot_pollutants_by_process(skims, scenario_2018_label, plot_dir, height_size=6, font_size=24) -plot_pollutants_by_process(skims, scenario_2050_label, plot_dir, height_size=6, font_size=24) - -print("End.") diff --git a/src/main/python/emissions/emissions_utils.py b/src/main/python/emissions/emissions_utils.py deleted file mode 100644 index 04dec08a1c2..00000000000 --- a/src/main/python/emissions/emissions_utils.py +++ /dev/null @@ -1,1761 +0,0 @@ -import pandas as pd -import numpy as np -import pyarrow as pa -import pyarrow.csv as pv -import pyarrow.compute as pc -import matplotlib.pyplot as plt -import matplotlib.colors as mcolors -import matplotlib.colors as colors -import matplotlib.patches as patches -import geopandas as gpd -from matplotlib import colors -from pyproj import Transformer -from shapely.geometry import LineString, Polygon -from tqdm import tqdm -from tqdm.auto import tqdm -from matplotlib.colors import LogNorm -import contextily as cx -import seaborn as sns -import gzip -import io -import h3 -import time -import os -import re -import shutil -import math -import warnings - -warnings.filterwarnings("ignore", category=FutureWarning, - message="The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.") - -class_2b3 = 'Class 2b&3 Vocational' -class_46 = 'Class 4-6 Vocational' -class_78_v = 'Class 7&8 Vocational' -class_78_t = 'Class 7&8 Tractor' -class_car = "Car" # these include light and medium duty trucks -class_bike = "Bike" -class_mdp = "MediumDutyPassenger" -not_matched = "Not Matched" - -class_to_category = { - class_2b3: 'Class2b3Vocational', - class_46: 'Class456Vocational', - class_78_v: 'Class78Vocational', - class_78_t: 'Class78Tractor' -} - -fuel_emfac2beam_map = { - 'Dsl': 'diesel', - 'Gas': 'gasoline', - 'NG': 'naturalgas', - 'Elec': 'electricity', - 'Phe': 'pluginhybridelectricity', - 'H2fc': 'hydrogen', - 'BioDsl': 'biodiesel' -} - -fuel_beam2emfac_map = { - 'diesel': 'Dsl', - 'gasoline': 'Gas', - 'naturalgas': 'NG', - 'electricity': 'Elec', - 'pluginhybridelectricity': 'Phe', - 'hydrogen': 'H2fc', - "biodiesel": 'BioDsl' -} - -# Fuel Color Map -fuel_color_map = { - 'Elec': '#4169E1', # Royal Blue - 'H2fc': '#6495ED', # Cornflower Blue - 'Phe': '#87CEEB', # Sky Blue - 'NG': '#B0E0E6', # Pale Blue - 'BioDsl': '#98FB98', # Pale Green - 'Dsl': '#FFD700', # Gold - 'Gas': '#708090' # Slate Gray -} - -process_color_map = { - 'IDLEX': '#fde725', # Light yellow - 'RUNEX': '#7ad151', # Light green - 'PMBW': '#22a884', # Teal - 'PMTW': '#2a788e', # Blue-green - 'STREX': '#8e0152', # Dark magenta - 'RUNLOSS': '#4b0082', # Indigo - 'HOTSOAK': '#414487', # Purple-blue - 'DIURN': '#440154', # Dark purple -} - -pollutant_columns = { - 'CH4': 'rate_ch4_gram_float', - 'CO': 'rate_co_gram_float', - 'CO2': 'rate_co2_gram_float', - 'HC': 'rate_hc_gram_float', - 'NH3': 'rate_nh3_gram_float', - 'NOx': 'rate_nox_gram_float', - 'PM': 'rate_pm_gram_float', - 'PM10': 'rate_pm10_gram_float', - 'PM2_5': 'rate_pm2_5_gram_float', - 'ROG': 'rate_rog_gram_float', - 'SOx': 'rate_sox_gram_float', - 'TOG': 'rate_tog_gram_float' -} - -emissions_processes = ["RUNEX", "IDLEX", "STREX", "DIURN", "HOTSOAK", "RUNLOSS", "PMTW", "PMBW"] - -region_to_emfac_area = { - "sfbay": "SF" -} - -skims_schema = pa.schema([ - ('hour', pa.int64()), - ('linkId', pa.int64()), - ('tazId', pa.string()), - ('vehicleTypeId', pa.string()), - ('emissionsProcess', pa.string()), - ('speedInMps', pa.float64()), - ('energyInJoule', pa.float64()), - ('observations', pa.int64()), - ('iterations', pa.int64()), - ('CH4', pa.float64()), - ('CO', pa.float64()), - ('CO2', pa.float64()), - ('HC', pa.float64()), - ('NH3', pa.float64()), - ('NOx', pa.float64()), - ('PM', pa.float64()), - ('PM10', pa.float64()), - ('PM2_5', pa.float64()), - ('ROG', pa.float64()), - ('SOx', pa.float64()), - ('TOG', pa.float64()) -]) - - -def darken_color(color, factor=0.8): - rgb = mcolors.to_rgb(color) - return tuple(max(0, c * factor) for c in rgb) - - -def sanitize_name(filename): - # Start with the original filename - sanitized = filename - - # Replace other common superscripts if needed - superscript_map = {'¹': '1', '²': '2', '³': '3', '⁴': '4', '⁵': '5', '⁶': '6', '⁷': '7', '⁸': '8', '⁹': '9'} - for sup, normal in superscript_map.items(): - sanitized = sanitized.replace(sup, normal) - - # Replace parentheses with underscores - sanitized = sanitized.replace('(', '_').replace(')', '_') - - # Replace forward slashes and backslashes with dashes - sanitized = sanitized.replace('/', '-').replace('\\', '-') - - # Replace spaces with underscores - sanitized = sanitized.replace(' ', '_') - - # Remove or replace any other non-alphanumeric characters (except dashes and underscores) - sanitized = re.sub(r'[^\w\-_]', '', sanitized) - - # Replace any sequence of dashes or underscores with a single underscore - sanitized = re.sub(r'[_-]+', '_', sanitized) - - # Remove leading and trailing underscores - sanitized = sanitized.strip('_') - - return sanitized - - -def get_vehicle_class_from_freight(vehicle_type): - if 'md' in vehicle_type: - return class_46 - elif 'hdt' in vehicle_type: - return class_78_v - elif 'hdv' in vehicle_type: - return class_78_t - else: - return None - - -def prepare_emfac_emissions_for_mapping(emissions_rates, emfac_class_map): - data = emissions_rates.copy() - data = data.fillna({'speed_time': ''}) # Replace NaN with empty string - data = data.reset_index(drop=True) # Reset index - grouped_data = data.groupby( - ['sub_area', "vehicle_class", 'fuel', 'process', "speed_time", "pollutant"] - )['emission_rate'].mean().reset_index() - # Extract county and area from sub_area - grouped_data['beamClass'] = grouped_data['vehicle_class'].map(emfac_class_map) - grouped_data.dropna(subset=['beamClass'], inplace=True) - grouped_data[['county', 'area']] = grouped_data['sub_area'].str.extract(r'^([^()]+)\s*\(([^)]+)\)') - # Clean up the extracted data - grouped_data['county'] = grouped_data['county'].str.strip().str.lower() - grouped_data['area'] = grouped_data['area'].str.strip() - grouped_data.drop(['sub_area'], axis=1, inplace=True) - # Create emfacId - grouped_data['emfacId'] = grouped_data.apply( - lambda row: sanitize_name(f"{row['vehicle_class']}-{row['fuel']}"), - axis=1 - ) - grouped_data.rename(columns={'vehicle_class': 'emfacClass', 'fuel': 'emfacFuel'}, inplace=True) - return grouped_data - - -def prepare_emfac_population_for_mapping(emfac_population, year, emfac_class_map, fuel_assumption_mapping, - ignore_model_year=True): - df = emfac_population[(emfac_population["calendar_year"] == str(year))].drop(["calendar_year"], axis=1) - if ignore_model_year: - # Group by vehicle_class and fuel, aggregating population - df = df.groupby(['vehicle_class', 'fuel'], as_index=False)['population'].sum() - - df['beamClass'] = df['vehicle_class'].map(emfac_class_map) - df.dropna(subset=['beamClass'], inplace=True) - - # Validation checks - if len(df["vehicle_class"].unique()) != len(emfac_class_map): - print("Warning: Mismatch in vehicle class mapping") - if not df['fuel'].isin(fuel_emfac2beam_map.keys()).all(): - print("Warning: Missing fuel type from dictionary") - - df["mappedFuel"] = df['fuel'].map(fuel_assumption_mapping) - df['emfacId'] = df.apply( - lambda row: sanitize_name(f"{row['vehicle_class']}-{row['fuel']}"), - axis=1 - ) - df.rename(columns={'vehicle_class': 'emfacClass', 'fuel': 'emfacFuel'}, inplace=True) - return df - - -def unpacking_ft_vehicle_population_mesozones(carriers, mesozones_to_county_file, mesozones_lookup_file): - import pygris - # ### Mapping counties with Mesozones ### - if not os.path.exists(mesozones_to_county_file): - county_data = pygris.counties(state='06', year=2018, cb=True, cache=True) - cbg_data = pygris.block_groups(state='06', year=2018, cb=True, cache=True) - county_data_clipped = county_data[['COUNTYFP', 'NAME']] - cbg_data_clipped = cbg_data[['GEOID', 'COUNTYFP']] - cbg_to_county = pd.merge(cbg_data_clipped, county_data_clipped, on="COUNTYFP", how='left') - mesozones_lookup = pd.read_csv(mesozones_lookup_file, dtype=str) - mesozones_lookup_clipped = mesozones_lookup[['MESOZONE', 'GEOID']] - mesozones_to_county = pd.merge(mesozones_lookup_clipped, cbg_to_county, on='GEOID', how='left') - mesozones_to_county.to_csv(mesozones_to_county_file, index=False) - else: - mesozones_to_county = pd.read_csv(mesozones_to_county_file, dtype=str) - - # TODO For future improvement find a way to map outside study area mesozones. It's a significant effort because - # TODO need to also restructure EMFAC in such a way vehicle population from outside study area well represented - if not mesozones_to_county[mesozones_to_county["NAME"].isna()].empty: - print("Mesozones outside study area do not have a proper GEOID and were not mapped.") - mesozones_to_county_studyarea = mesozones_to_county[mesozones_to_county["NAME"].notna()][["MESOZONE", "NAME"]] - - # ### Mapping freight carriers with counties, payload and vehicle types ### - carriers_by_zone = pd.merge(carriers, mesozones_to_county_studyarea, left_on='warehouseZone', - right_on='MESOZONE', how='left') - if not carriers_by_zone[carriers_by_zone['NAME'].isna()].empty: - print( - "Something went wrong with the mapping of freight carrier zones with mesozones. Here the non mapped ones:") - print(carriers_by_zone[carriers_by_zone['NAME'].isna()]) - carriers_by_zone = carriers_by_zone[['tourId', 'vehicleId', 'vehicleTypeId', 'NAME']].rename( - columns={'NAME': 'zone'}) - - return carriers_by_zone - - -def prepare_pax_vehicle_population_for_mapping(vehicletypes, fuel_assumption_mapping): - # Apply the parsing function to create a new DataFrame with the parsed values - data = vehicletypes.copy() - data = data[ - (data['vehicleCategory'].isin([class_car, class_bike])) | - ((data['vehicleCategory'] == class_mdp) & (data['vehicleTypeId'].str.lower().str.contains('bus'))) - ] - - # parsed_probs = data['sampleProbabilityString'].apply(parse_probability_string).apply(pd.Series) - # Merge the new columns with the original DataFrame - # data = pd.concat([data, parsed_probs], axis=1) - # Fill NaN values with 0 for the new probability columns - # prob_columns = [col for col in data.columns if col.startswith('ridehail_prob') or col.startswith('private_prob')] - # data[prob_columns] = data[prob_columns].fillna(0) - # Load and process vehicle types - data['beamClass'] = data['vehicleCategory'] - data['beamFuel'] = np.where( - (data['primaryFuelType'] == fuel_emfac2beam_map["Elec"]) & - data['secondaryFuelType'].notna(), - fuel_emfac2beam_map['Phe'], - data['primaryFuelType'] - ) - - def handle_missing_fuel(x): - try: - return fuel_assumption_mapping[fuel_beam2emfac_map[x.lower()]] - except KeyError: - warnings.warn(f"Fuel type '{x}' not found in mapping. Using original value.") - return None - - data['mappedFuel'] = data['beamFuel'].map(handle_missing_fuel) - return data - - -def prepare_ft_vehicle_population_for_mapping(carriers, payloads_raw, ft_vehicletypes, - fuel_assumption_mapping): - carriers_formatted = carriers[['tourId', 'vehicleId', 'vehicleTypeId']] - payloads = payloads_raw[['payloadId', 'tourId', 'payloadType']].copy() - ft_vehicletypes = ft_vehicletypes[['vehicleTypeId', 'primaryFuelType', 'secondaryFuelType']].copy() - - ft_vehicletypes['beamClass'] = ft_vehicletypes['vehicleTypeId'].apply(get_vehicle_class_from_freight) - - # Summarize data - payloads.loc[:, 'payloadType'] = payloads['payloadType'].astype(str) - payloads_summary = payloads.groupby(['tourId'])['payloadType'].agg('|'.join).reset_index() - - # Merge payload summary with carriers - payloads_merged = pd.merge(payloads_summary, carriers_formatted, on='tourId', how='left') - - # Load and process vehicle types - ft_vehicletypes['beamFuel'] = np.where( - (ft_vehicletypes['primaryFuelType'] == fuel_emfac2beam_map["Elec"]) & - ft_vehicletypes['secondaryFuelType'].notna(), - fuel_emfac2beam_map['Phe'], - ft_vehicletypes['primaryFuelType'] - ) - - def handle_missing_fuel(x): - try: - return fuel_assumption_mapping[fuel_beam2emfac_map[x.lower()]] - except KeyError: - warnings.warn(f"Fuel type '{x}' not found in mapping. Using original value.") - return x - - ft_vehicletypes['mappedFuel'] = ft_vehicletypes['beamFuel'].map(handle_missing_fuel) - - # Merge payloads with vehicle types - payloads_vehtypes = pd.merge( - payloads_merged, - ft_vehicletypes[['vehicleTypeId', 'beamClass', 'beamFuel', 'mappedFuel']], - on='vehicleTypeId', - how='left' - ) - - # Check for missing fuel types - if payloads_vehtypes['beamFuel'].isna().any(): - print("Warning: Missing fuel types for some vehicle IDs") - print(payloads_vehtypes[payloads_vehtypes['beamFuel'].isna()]) - - # Remove duplicates and return - return payloads_vehtypes.drop_duplicates('vehicleId', keep='first') - - -def distribution_based_vehicle_classes_assignment(ft_df, emfac_df): - # Remove 'Class 2b&3 Vocational' from EMFAC data - emfac_df = emfac_df[emfac_df['beamClass'] != class_2b3] - - def sample_emfac(the_class, ft_mapped_fuel): - emfac_grouped = emfac_df[ - (emfac_df['beamClass'] == the_class) & (emfac_df['mappedFuel'] == ft_mapped_fuel)] - if emfac_grouped.empty: - print(f"failed to match this fuel: {ft_mapped_fuel}") - emfac_grouped = emfac_df[emfac_df['beamClass'] == the_class] - return emfac_grouped.sample(n=1, weights='population')['emfacId'].iloc[0] - - total_emfac = emfac_df["population"].sum() - class_46_share = emfac_df[emfac_df['beamClass'] == class_46]["population"].sum() / total_emfac - class_78_v_share = emfac_df[emfac_df['beamClass'] == class_78_v]["population"].sum() / total_emfac - total_freight = len(ft_df) - class_46_target = int(class_46_share * total_freight) - class_78_v_target = int(class_78_v_share * total_freight) - - class_46_count = 0 - class_78_v_count = 0 - - def sample_emfac_class(row): - nonlocal class_46_count, class_78_v_count - - if class_46_count < class_46_target: - if row['beamClass'] == class_46: - class_46_count += 1 - return sample_emfac(class_46, row['mappedFuel']) - - if row['beamClass'] == class_78_v: - class_46_count += 1 - return sample_emfac(class_46, row['mappedFuel']) - - if row['beamClass'] == class_78_t: - class_46_count += 1 - return sample_emfac(class_46, row['mappedFuel']) - - if class_78_v_count < class_78_v_target: - if row['beamClass'] == class_78_v: - class_78_v_count += 1 - return sample_emfac(class_78_v, row['mappedFuel']) - - if row['beamClass'] == class_78_t: - class_78_v_count += 1 - return sample_emfac(class_78_v, row['mappedFuel']) - - return sample_emfac(class_78_t, row['mappedFuel']) - - ft_df['beamClassBis'] = ft_df['beamClass'].map({class_46: 1, class_78_v: 2, class_78_t: 3}) - ft_df['emfacId'] = ft_df.sort_values('beamClassBis').apply(sample_emfac_class, axis=1) - ft_df["oldVehicleTypeId"] = ft_df["vehicleTypeId"] - ft_df['vehicleTypeId'] = ft_df.apply( - lambda row: f"EMFAC-{row['emfacId']}--TRUCK-{'-'.join(row['oldVehicleTypeId'].split('-')[:-1])}", - axis=1 - ) - merged = pd.merge(ft_df, emfac_df.drop(["beamClass", "mappedFuel"], axis=1), on="emfacId", how="left").drop( - ["beamClassBis"], axis=1) - return merged - - -def pivot_rates_for_beam(df_raw): - unique_speed_time = df_raw.speed_time.unique() - has_non_empty_speed_time = any(len(str(x)) > 0 for x in unique_speed_time) and not pd.isnull( - unique_speed_time).all() - index_ = ["emfacId", 'county', 'process'] - if has_non_empty_speed_time: - index_.append("speed_time") - pivot_df = df_raw.pivot_table(index=index_, columns='pollutant', values='emission_rate', aggfunc='first', - fill_value=0).reset_index() - pivot_df = pivot_df.rename(columns=pollutant_columns) - # Add missing columns with default values - for col in pollutant_columns.values(): - if col not in pivot_df.columns: - pivot_df[col] = 0.0 - pivot_df.insert(0, 'speed_mph_float_bins', "") - pivot_df.insert(1, 'time_minutes_float_bins', "") - return pivot_df - - -def numerical_column_to_binned_and_pivot(df_raw, numerical_colname, binned_colname, edge_values): - pivot_df = pivot_rates_for_beam(df_raw).sort_values(by='speed_time', ascending=True) - df_raw_last_row = pivot_df.iloc[-1].copy() - df_raw_last_row['speed_time'] = edge_values[1] - pivot_df = pd.concat([pivot_df, pd.DataFrame([df_raw_last_row])], ignore_index=True) - col_sorted = sorted(pivot_df[numerical_colname].unique()) - col_bins = [edge_values[0]] + col_sorted - col_labels = [f"[{col_bins[i]}, {col_bins[i + 1]})" for i in range(len(col_bins) - 1)] - pivot_df[binned_colname] = pd.cut(pivot_df[numerical_colname], bins=col_bins, labels=col_labels, right=True) - return pivot_df - - -def process_rates_group(df, row): - mask = ((df["county"] == row["county"]) & (df["emfacId"] == row["emfacId"])) - df_subset = df[mask] - df_output_list = [] - for process in emissions_processes: - df_temp = df_subset[df_subset['process'] == process] - if not df_temp.empty: - if process in ['RUNEX', 'PMBW']: - df_temp = numerical_column_to_binned_and_pivot(df_temp, 'speed_time', 'speed_mph_float_bins', - [0.0, 200.0]) - elif process == 'STREX': - df_temp = numerical_column_to_binned_and_pivot(df_temp, 'speed_time', 'time_minutes_float_bins', - [0.0, 3600.0]) - else: - df_temp = pivot_rates_for_beam(df_temp) - df_output_list.append(df_temp) - - return pd.concat(df_output_list, ignore_index=True) - - -def format_rates_for_beam(emissions_rates): - from joblib import Parallel, delayed - - # Assuming emissions_rates is already loaded into a DataFrame `df` - group_by_cols = ["county", "emfacId"] - df_unique = emissions_rates[group_by_cols].drop_duplicates().reset_index(drop=True) - - # Parallel processing - df_output_list = Parallel(n_jobs=-1)( - delayed(process_rates_group)(emissions_rates, row) for index, row in df_unique.iterrows() - ) - - # Formatting for merge - df_output = pd.concat(df_output_list, ignore_index=True).drop(["speed_time"], axis=1) - - # Filter out rows where all emission columns are zero - emission_columns = [col for col in df_output.columns if col.startswith('rate_') and col.endswith('_gram_float')] - filtered_out = df_output[(df_output[emission_columns] == 0).all(axis=1)] - df_output = df_output[~(df_output[emission_columns] == 0).all(axis=1)] - - # Reorder columns to ensure 'county' is at the front - columns = df_output.columns.tolist() - columns = ['county'] + [col for col in columns if col != 'county'] - df_output = df_output[columns] - return df_output, filtered_out - - -def process_single_vehicle_type(veh_type, emissions_rates, rates_prefix_filepath): - veh_type_id = veh_type['vehicleTypeId'] - emfac_id = veh_type['emfacId'] - - # Filter taz_emissions_rates for the current vehicle type - veh_emissions = emissions_rates[emissions_rates['emfacId'] == emfac_id].copy() - - if not veh_emissions.empty: - # Remove the emfacId column as it's no longer needed - veh_emissions = veh_emissions.drop('emfacId', axis=1) - - # Generate the file name - file_path = f"{rates_prefix_filepath}{veh_type_id}.csv" - - print("Writing " + file_path) - # Save the emissions rates to a CSV file - veh_emissions.to_csv(file_path, index=False) - - return veh_type_id - else: - print(f"Warning: No emissions data found for vehicle type {veh_type_id}") - return veh_type_id - - -def assign_emissions_rates_to_vehtypes(emissions_rates, vehicle_types, output_dir, emissions_rates_relative_filepath): - from joblib import Parallel, delayed - emissions_rates_dir = os.path.abspath(os.path.join(output_dir, emissions_rates_relative_filepath)) - if ensure_empty_directory(emissions_rates_dir): - print(f"Ready to write new data to the directory {emissions_rates_dir}") - else: - print(f"Failed to prepare the directory {emissions_rates_dir}. Please check permissions and try again.") - os.makedirs(emissions_rates_dir, exist_ok=True) - - # Use parallel processing with error handling and chunking - chunk_size = 100 # Adjust this value based on your data size and available memory - results = [] - - for i in range(0, len(vehicle_types), chunk_size): - chunk = vehicle_types.iloc[i:i + chunk_size] - - chunk_results = Parallel(n_jobs=-1, timeout=600)( # 10-minute timeout - delayed(process_single_vehicle_type)( - veh_type, - emissions_rates, - f"{output_dir}/{emissions_rates_relative_filepath}/TrAP--" - ) for _, veh_type in chunk.iterrows() - ) - - results.extend(chunk_results) - - # Clear some memory - del chunk_results - - # Update the vehicle_types DataFrame with the new emissionsRatesFile information - for veh_type_id in results: - if veh_type_id: - relative_rates_filepath = f"{emissions_rates_relative_filepath}/TrAP--{veh_type_id}.csv" - vehicle_types.loc[ - vehicle_types['vehicleTypeId'] == veh_type_id, 'emissionsRatesFile'] = relative_rates_filepath - - return vehicle_types - - -def build_new_pax_vehtypes(pax_emfac_population_for_mapping, pax_population_for_mapping): - df_merged = pd.merge(pax_population_for_mapping, pax_emfac_population_for_mapping, - on=['beamClass', 'mappedFuel'], how='left') - df_merged_car = df_merged[df_merged["beamClass"] == class_car].copy() - df_merged_others = df_merged[df_merged["beamClass"] != class_car].copy() - - df_merged_car['population_share'] = df_merged_car['population'] / df_merged_car['population'].sum() - df_merged_car['updated_sampleProbabilityString'] = df_merged_car.apply( - lambda row: update_sample_probability_string(row), - axis=1 - ) - df_merged_car['updated_sampleProbabilityWithinCategory'] = df_merged_car.apply( - lambda row: row['sampleProbabilityWithinCategory'] * row['population_share'], - axis=1 - ) - # Update vehicleTypeId only for eligible rows - df_merged_car['updated_vehicleTypeId'] = df_merged_car.apply( - lambda row: f"EMFAC-{row['emfacId']}--ADOPT-{row['vehicleTypeId']}", - axis=1 - ) - # Update the original dataframe with new probabilities and vehicleTypeId - df_merged_car['sampleProbabilityString'] = df_merged_car['updated_sampleProbabilityString'] - df_merged_car['sampleProbabilityWithinCategory'] = df_merged_car['updated_sampleProbabilityWithinCategory'] - df_merged_car['vehicleTypeId'] = df_merged_car['updated_vehicleTypeId'] - updated_pax_vehicle_types = pd.concat([df_merged_car[df_merged_others.columns], df_merged_others], axis=0) - - return updated_pax_vehicle_types - - -def build_new_ft_vehtypes(updated_ft_population, ft_vehicle_types): - # Create a copy of the original vehicleTypeId and set up a lookup dictionary - ft_vehicle_types_dict = ft_vehicle_types.set_index("vehicleTypeId").to_dict('index') - - # Remove duplicates based on vehicleTypeId, keeping the first occurrence - unique_vehicle_types = updated_ft_population.drop_duplicates(subset='vehicleTypeId', keep='first') - - def process_row(row): - new_row = ft_vehicle_types_dict[row["oldVehicleTypeId"]].copy() - new_row["vehicleTypeId"] = row["vehicleTypeId"] - new_row['vehicleClass'] = row["beamClass"] - new_row['vehicleCategory'] = class_to_category[row['beamClass']] - new_row["emfacId"] = row['emfacId'] - return new_row - - # Apply process_row to the unique vehicle types - result_df = pd.DataFrame(unique_vehicle_types.apply(process_row, axis=1).tolist()) - - # Define the desired column order with 'vehicleTypeId' at the front - columns_order = ['vehicleTypeId'] + [ - col for col in result_df.columns if col not in {'vehicleTypeId'} - ] - - # Reorder the columns - result_df = result_df[columns_order] - - return result_df - - -def assign_new_ft_vehtypes_to_carriers(carrier_df, updated_ft_population, carriers_emissions_file): - vehicle_id_to_type_mapping = dict(zip(updated_ft_population['vehicleId'], - updated_ft_population['vehicleTypeId'])) - - def update_vehicle_type(row): - return vehicle_id_to_type_mapping.get(row['vehicleId']) - - carrier_df_new = carrier_df.copy() - carrier_df_new['vehicleTypeId'] = carrier_df.apply(update_vehicle_type, axis=1) - carrier_df_new.dropna(subset=['vehicleTypeId'], inplace=True) - print(f"Writing {carriers_emissions_file}") - carrier_df_new.to_csv(carriers_emissions_file, index=False) - return carrier_df_new - - -def combine_csv_files(input_files, output_file): - # Read and combine CSV files vertically - combined_df = pd.concat([pd.read_csv(f) for f in input_files], ignore_index=True) - - # Write the combined dataframe to a new CSV file - combined_df.to_csv(output_file, index=False) - - print(f"Combined CSV file has been created: {output_file}") - return combined_df # Return the dataframe for further processing if needed - - -def ensure_empty_directory(directory_path): - """ - Ensure an empty directory exists at the given path. - If it exists, delete it and its contents, then recreate it. - If it doesn't exist, create it. - """ - directory_path = os.path.abspath(directory_path) - - if os.path.exists(directory_path): - try: - shutil.rmtree(directory_path) - print(f"Existing directory removed: {directory_path}") - except Exception as e: - print(f"Error removing directory {directory_path}: {e}") - return False - - try: - os.makedirs(directory_path) - print(f"Directory created: {directory_path}") - return True - except Exception as e: - print(f"Error creating directory {directory_path}: {e}") - return False - - -def calculate_truck_ownership_probability(income): - """ - Calculate the probability of truck ownership based on household income. - - :param income: Household income in thousands of dollars per year - :return: Probability of truck ownership (0 to 1) - """ - k = 0.1 # Steepness parameter - x0 = 80 # Income at which probability is 0.5 - - # Calculate probability using logistic function - probability = 1 / (1 + math.exp(-k * (income - x0))) - - return probability - - -def parse_probability_string(prob_string): - result = {} - parts = prob_string.split(';') - for part in parts: - try: - key, value = part.strip().split(':') - if 'ridehail' in key: - result['ridehail_prob_all'] = float(value) - elif 'income' in key: - income_range = key.split('|')[1].strip() - result[f'private_prob_{income_range}'] = float(value) - except ValueError: - # If the part doesn't have the expected structure, skip it - continue - return result - - -def update_sample_probability_string(row): - groups = row['sampleProbabilityString'].replace(' ', '').lower().split(';') - updated_groups = [] - - for group in groups: - if '|' not in group: - updated_groups.append(group) - continue - - group_key, values = group.split('|') - key_probs = [kp.split(':') for kp in values.split(',')] - - if group_key == 'ridehail': - # Update 'all' probability with population_share - key_probs = [(k, str(float(p) * row['population_share']) if k == 'all' else p) for k, p in key_probs] - elif group_key == 'income': - # Update income category probability with population_share - key_probs = [(k, str(float(p) * row['population_share'])) for k, p in key_probs] - - updated_values = ','.join([f"{k}:{p}" for k, p in key_probs]) - updated_groups.append(f"{group_key}|{updated_values}") - - return '; '.join(updated_groups) - - -def create_vehicle_class_mapping(vehicle_list): - mapping = {} - - for vehicle in vehicle_list: - if 'Utility' in vehicle or 'Public' in vehicle: - mapping[vehicle] = not_matched - elif 'Port' in vehicle or 'POLA' in vehicle or 'POAK' in vehicle: - mapping[vehicle] = not_matched - elif 'SWCV' in vehicle or 'PTO' in vehicle or 'T6TS' in vehicle: - mapping[vehicle] = not_matched - - elif vehicle in ['LDA', 'LDT1', 'LDT2', 'MDV']: - mapping[vehicle] = class_car - elif vehicle in ['MCY']: - mapping[vehicle] = class_bike - elif vehicle in ['UBUS']: - mapping[vehicle] = class_mdp - elif 'LHD' in vehicle: - mapping[vehicle] = class_2b3 - - elif 'Class 4' in vehicle or 'Class 5' in vehicle or 'Class 6' in vehicle: - mapping[vehicle] = class_46 - - elif 'Class 7' in vehicle or 'Class 8' in vehicle: - if 'Tractor' in vehicle or 'CAIRP' in vehicle: - mapping[vehicle] = class_78_t - else: - mapping[vehicle] = class_78_v - elif "T7IS" in vehicle: - mapping[vehicle] = class_78_t - - else: - mapping[vehicle] = not_matched - - from collections import defaultdict - class_groups = defaultdict(list) - for vehicle, vehicle_class in mapping.items(): - class_groups[vehicle_class].append(vehicle) - for vehicle_class, vehicles in class_groups.items(): - print(f"Category: {vehicle_class}") - for vehicle in vehicles: - print(f" - {vehicle}") - - ft_emfac_class_map = {emfac: beam for emfac, beam in mapping.items() if - beam in [class_46, class_78_v, class_78_t]} - pax_emfac_class_map = {emfac: beam for emfac, beam in mapping.items() if - beam in [class_car, class_bike, class_mdp]} - - return pax_emfac_class_map, ft_emfac_class_map - - -def load_network(network_file, source_epsg): - # Read and process network file - network = pd.read_csv(network_file) - transformer = Transformer.from_crs(source_epsg, "EPSG:4326", always_xy=True) - - # Vectorized coordinate conversion - network[['fromLocationX', 'fromLocationY']] = network.apply( - lambda row: pd.Series(transformer.transform(row['fromLocationX'], row['fromLocationY'])), - axis=1, result_type='expand' - ) - network[['toLocationX', 'toLocationY']] = network.apply( - lambda row: pd.Series(transformer.transform(row['toLocationX'], row['toLocationY'])), - axis=1, result_type='expand' - ) - - return network[['linkId', 'linkLength', 'fromLocationX', 'fromLocationY', 'toLocationX', 'toLocationY']] - - -def read_skims_emissions(skims_file, vehicleTypes_file, vehicleTypeId_filter, network, expansion_factor, scenario_name): - start_time = time.time() - # Read and filter the skims file using PyArrow - table = pv.read_csv(skims_file, - read_options=pv.ReadOptions(use_threads=True), - parse_options=pv.ParseOptions(delimiter=','), - convert_options=pv.ConvertOptions(column_types=skims_schema)) - - filtered_table = table.filter(pc.match_substring(table['vehicleTypeId'], pattern=vehicleTypeId_filter)) - - # Perform calculations in PyArrow - annual_expansion = filtered_table['observations'] * expansion_factor * 365 - - for pollutant in pollutant_columns.keys(): - filtered_table = filtered_table.append_column( - f'{pollutant}_annual', - pc.multiply(pc.divide(filtered_table[pollutant], pc.cast(pa.scalar(1e6), pa.float64())), annual_expansion) - ) - - filtered_table = filtered_table.append_column( - 'annualHourlyEnergyGwh', - pc.multiply(pc.divide(filtered_table['energyInJoule'], pc.cast(pa.scalar(3.6e12), pa.float64())), - annual_expansion) - ) - - filtered_table = filtered_table.append_column( - 'annualHourlySpeedMph', - pc.divide(filtered_table['speedInMps'], pc.cast(pa.scalar(2.237), pa.float64())) - ) - - # Convert to pandas - df = filtered_table.to_pandas() - - # Process vehicleTypes file - vehicleTypes = pd.read_csv(vehicleTypes_file) - vehicleTypes['fuel'] = vehicleTypes['emfacId'].str.split('-').str[-1] - vehicleTypes['class'] = vehicleTypes['vehicleClass'].str.replace('Vocational|Tractor', '', regex=True).str.strip() - - # Merge with vehicleTypes and network - df = (df.merge(vehicleTypes[['vehicleTypeId', 'class', 'fuel']], on='vehicleTypeId', how='left') - .merge(network[['linkId', 'linkLength']], on='linkId', how='left')) - - # Calculate annualHourlyMVMT - df['annualHourlyMVMT'] = (df['linkLength'] * 6.21371192e-13) * annual_expansion - - # Rename column - df.rename(columns={'emissionsProcess': 'process'}, inplace=True) - - # Melt the dataframe - id_vars = ['hour', 'linkId', 'tazId', 'class', 'fuel', 'process', 'annualHourlySpeedMph', 'annualHourlyEnergyGwh', - 'annualHourlyMVMT'] - value_vars = [f'{pollutant}_annual' for pollutant in pollutant_columns.keys()] - melted = df.melt(id_vars=id_vars, value_vars=value_vars, var_name='pollutant', value_name='rate') - melted['pollutant'] = melted['pollutant'].str.replace('_annual', '') - melted['scenario'] = scenario_name - - end_time = time.time() - print(f"Time taken to read the file: {end_time - start_time:.2f} seconds to read file {skims_file}") - - return melted - - -def read_skims_emissions_chunked(skims_file, vehicleTypes_file, vehicleTypeId_filter, network, expansion_factor, - scenario_name, chunk_size=1000000): - start_time = time.time() - - # Process vehicleTypes file - vehicleTypes = pd.read_csv(vehicleTypes_file) - vehicleTypes['emfacFuel'] = vehicleTypes['emfacId'].str.split('-').str[-1] - vehicleTypes['class'] = vehicleTypes['vehicleCategory'].str.replace('Vocational|Tractor', '', regex=True).str.strip() - vehicleTypes['beamFuel'] = np.where( - (vehicleTypes['primaryFuelType'].str.lower() == fuel_emfac2beam_map["Elec"]) & vehicleTypes[ - 'secondaryFuelType'].notna(), - 'Phe', - vehicleTypes['primaryFuelType'].str.lower().map(fuel_beam2emfac_map) - ) - - # Initialize an empty list to store the processed chunks - result_chunks = [] - - # Set up the CSV reader with chunking - csv_reader = pv.open_csv( - skims_file, - read_options=pv.ReadOptions(block_size=chunk_size, use_threads=True), - parse_options=pv.ParseOptions(delimiter=','), - convert_options=pv.ConvertOptions(column_types=skims_schema) - ) - - # Get total file size for progress bar - total_size = os.path.getsize(skims_file) - - # Initialize progress bar - pbar = tqdm(total=total_size, unit='B', unit_scale=True, desc="Processing chunks", - position=0, leave=True, mininterval=1.0, maxinterval=10.0, miniters=1) - - # Process the skims file in chunks - for chunk in csv_reader: - chunk_size = chunk.nbytes - - # Filter the chunk - mask = pc.match_substring(chunk['vehicleTypeId'], pattern=vehicleTypeId_filter) - filtered_chunk = chunk.filter(mask) - # del chunk # Explicitly remove reference to the original chunk - - # Perform calculations in PyArrow - observations_expansion = pc.multiply( - filtered_chunk['observations'], pc.cast(pa.scalar(expansion_factor), pa.float64()) - ) - - new_columns = [] - new_fields = [] - for pollutant in pollutant_columns.keys(): - new_fields.append(pa.field(f'scaled_{pollutant}', pa.float64(), True)) - new_columns.append(pc.multiply( - pc.divide( - filtered_chunk[pollutant], pc.cast(pa.scalar(1e6), pa.float64()) - ), - observations_expansion - )) - - new_fields.append(pa.field('kwh', pa.float64(), True)) - new_columns.append( - pc.multiply( - pc.divide( - filtered_chunk['energyInJoule'], pc.cast(pa.scalar(3.6e6), pa.float64()) - ), - observations_expansion - ) - ) - - new_fields.append(pa.field('vht', pa.float64(), True)) - new_columns.append( - pc.multiply( - pc.divide( - filtered_chunk['travelTimeInSecond'], pc.cast(pa.scalar(3.6e3), pa.float64()) - ), - observations_expansion - ) - ) - - # Create a new RecordBatch with additional columns - # new_schema = filtered_chunk.schema.append(new_columns[::2]) - # new_columns = filtered_chunk.columns + new_columns[1::2] - new_schema = filtered_chunk.schema - for field in new_fields: - new_schema = new_schema.append(field) - - new_columns = filtered_chunk.columns + new_columns - filtered_chunk = pa.RecordBatch.from_arrays(new_columns, schema=new_schema) - - # Convert to pandas - df_chunk = filtered_chunk.to_pandas() - # del filtered_chunk - - # Merge with vehicleTypes and network - df_chunk_merged = ( - df_chunk - .merge(vehicleTypes[['vehicleTypeId', 'class', 'beamFuel', 'emfacFuel', 'emfacId']], on='vehicleTypeId', how='left') - .merge(network[['linkId', 'linkLength']], on='linkId', how='left') - ) - # del df_chunk - - # Calculate annualHourlyMVMT - df_chunk_merged['vmt'] = (df_chunk_merged['linkLength'] * 6.21371192e-4) * observations_expansion - - # Rename column - df_chunk_merged.rename(columns={'emissionsProcess': 'process'}, inplace=True) - - # Melt the dataframe - id_vars = ['hour', 'linkId', 'tazId', 'emfacId', 'class', 'beamFuel', 'emfacFuel', 'process', 'kwh', 'vmt', 'vht'] - value_vars = [f'scaled_{pollutant}' for pollutant in pollutant_columns.keys()] - melted_chunk = df_chunk_merged.melt( - id_vars=id_vars, - value_vars=value_vars, - var_name='pollutant', - value_name='rate' - ) - # del df_chunk_merged - melted_chunk['pollutant'] = melted_chunk['pollutant'].str.replace('scaled_', '') - melted_chunk['scenario'] = scenario_name - - result_chunks.append(melted_chunk) - - # Update progress bar - pbar.update(chunk_size) - - # Close progress bar - pbar.close() - - # Combine all processed chunks - melted = pd.concat(result_chunks, ignore_index=True) - - end_time = time.time() - print(f"Time taken to read the file: {end_time - start_time:.2f} seconds to read file {skims_file}") - - return melted - - -def plot_hourly_emissions_by_scenario_class_fuel(emissions_skims, pollutant, output_dir, plot_legend, height_size, font_size): - data = emissions_skims[emissions_skims['pollutant'] == pollutant].copy() - grouped_data = data.groupby(['scenario', 'hour', 'class', 'emfacFuel'])['rate'].sum().reset_index() - - plt.figure(figsize=(20, height_size)) - - grouped_data['fuel_class'] = grouped_data['emfacFuel'].astype(str) + ', ' + grouped_data['class'].astype(str) - scenarios = grouped_data['scenario'].unique() - fuel_classes = sorted(grouped_data['fuel_class'].unique()) - all_hours = sorted(grouped_data['hour'].unique()) - - - # Create color map for fuel_classes - fuel_class_colors = {} - for fc in fuel_classes: - fuel, vehicle_class = fc.split(',') - fuel = fuel.strip() - vehicle_class = vehicle_class.strip() - base_color = fuel_color_map[fuel] # Default to black if fuel not found - if any(c in vehicle_class for c in ['7', '8']): - fuel_class_colors[fc] = darken_color(base_color) - else: - fuel_class_colors[fc] = base_color - - x = np.arange(len(all_hours)) - width = 0.35 / len(scenarios) - - scenarios_labeling = [] - for i, scenario in enumerate(scenarios): - scenarios_labeling.append(scenario) - scenario_data = grouped_data[grouped_data['scenario'] == scenario] - bottom = np.zeros(len(all_hours)) - for fuel_class in fuel_classes: - fuel_class_data = scenario_data[scenario_data['fuel_class'] == fuel_class] - # Create an array of rates for all hours, filling with zeros where data is missing - rates = np.zeros(len(all_hours)) - for _, row in fuel_class_data.iterrows(): - hour_index = all_hours.index(row['hour']) - rates[hour_index] = row['rate'] - - # Add edgecolor and linewidth parameters to create a subtle border - plt.bar(x + i * width, rates, width, bottom=bottom, - label=f"{fuel_class}" if i == 0 else "", - color=fuel_class_colors[fuel_class], - edgecolor='black', # Add black edge color - linewidth=0.5) # Adjust linewidth as needed - bottom += rates - - plt.title( - f'{pollutant.replace("_", ".")} Emissions: {" vs. ".join(scenarios_labeling)}', - fontsize=font_size+4) - plt.xlabel('Hour', fontsize=font_size) - plt.ylabel('Emissions (Metric Tons)', fontsize=font_size) - plt.xticks(x + width * (len(scenarios) - 1) / 2, all_hours, fontsize=font_size) - plt.yticks(fontsize=24) - if plot_legend: - plt.legend(title='Fuel, Class', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=font_size+4, title_fontsize=font_size+4) - plt.grid(axis='y', linestyle='--', alpha=0.7) - - plt.tight_layout() - plt.savefig(f'{output_dir}/{pollutant.lower()}_emissions_by_scenario_hour_class_fuel.png', dpi=300, bbox_inches='tight') - - -def plot_hourly_activity(tours_types, output_dir, height_size): - # Preprocess data - tours_types['class'] = tours_types['vehicleCategory'].str.replace('Vocational|Tractor', '', regex=True).str.strip() - tours_types['fuel'] = tours_types['primaryFuelType'].str.lower().map(fuel_beam2emfac_map) - tours_types['fuel'] = np.where((tours_types['fuel'] == "Elec") & tours_types['secondaryFuelType'].notna(), 'Phe', - tours_types['fuel']) - tours_types['fuel_class'] = tours_types['fuel'] + '-' + tours_types['class'] - tours_types['departure_hour'] = (tours_types['departureTimeInSec'] / 3600).astype(int) % 24 - # Group by scenario, hour, and fuel_class, count the number of tours - hourly_activity = tours_types.groupby(['scenario', 'departure_hour', 'fuel_class']).size().unstack( - level=[0, 2], fill_value=0 - ) - - scenarios = tours_types['scenario'].unique() - # If the DataFrame is empty, create a default one with all hours - if hourly_activity.empty: - fuel_classes = tours_types['fuel_class'].unique() - index = pd.Index(range(24), name='departure_hour') - columns = pd.MultiIndex.from_product([scenarios, fuel_classes], names=['scenario', 'fuel_class']) - hourly_activity = pd.DataFrame(0, index=index, columns=columns) - else: - # Ensure all hours are present - for hour in range(24): - if hour not in hourly_activity.index: - hourly_activity.loc[hour] = 0 - hourly_activity = hourly_activity.sort_index() - - # Create the plot - plt.figure(figsize=(20, height_size)) - x = np.arange(24) # 24 hours - width = 0.35 # width of the bars - scenarios = hourly_activity.columns.levels[0] - - # Get all unique fuel classes across all scenarios - all_fuel_classes = set() - for scenario in scenarios: - all_fuel_classes.update(hourly_activity[scenario].columns) - - fuel_order = list(fuel_color_map.keys()) - # Sort fuel classes based on the defined order - sorted_fuel_classes = sorted(all_fuel_classes, - key=lambda x: ( - fuel_order.index(x.split('-')[0]) if x.split('-')[0] in fuel_order else len( - fuel_order), x)) - - # Create a color map for all fuel types - #color_map = {fuel: fuel_color_map[fuel] for fuel in fuel_order} - color_map = {} - for fc in sorted_fuel_classes: - fuel, vehicle_class = fc.split('-') - base_color = fuel_color_map[fuel] # Default to black if fuel not found - if any(c in vehicle_class for c in ['7', '8']): - color_map[fc] = darken_color(base_color) - else: - color_map[fc] = base_color - - print("Sorted fuel classes:", sorted_fuel_classes) - print("Color map:", color_map) - - # Plot stacked bars for each scenario - legend_handles = [] - legend_labels = [] - for i, scenario in enumerate(scenarios): - bottom = np.zeros(24) - for fuel_class in sorted_fuel_classes: - color = color_map[fuel_class] - - if fuel_class in hourly_activity[scenario].columns: - values = hourly_activity[scenario][fuel_class] - else: - values = np.zeros(24) - - bar = plt.bar(x + i * width, values, width, bottom=bottom, color=color, edgecolor='black', linewidth=0.5) - bottom += values - - if fuel_class not in legend_labels: - legend_handles.append(bar) - legend_labels.append(fuel_class) - - # plt.title(f'Weekday Tour Activity by Fuel, Class and Scenario: {" vs ".join(scenarios).replace("_", " ")}', fontsize=24) - plt.xlabel('Hour', fontsize=24) - plt.ylabel('Number of Tours Departing', fontsize=24) - plt.xticks(x + width / 2, range(24), fontsize=24) - plt.yticks(fontsize=12) - - # Create legend with ordered fuel classes - plt.legend(legend_handles, legend_labels, fontsize=28, loc='upper left', bbox_to_anchor=(1, 1)) - - plt.grid(axis='y', linestyle='--', alpha=0.7) - - # Adjust layout and save - plt.tight_layout() - plt.savefig(f'{output_dir}/hourly_activity_by_scenario_fuel_class.png', dpi=300, bbox_inches='tight') - plt.close() - - print(f"Plot saved as {output_dir}/hourly_activity_by_scenario_fuel_class.png") - - -def plot_hourly_vmt(df, output_dir, height_size): - # Preprocess the data - df['fuel_class'] = df['beamFuel'].astype(str) + '-' + df['class'].astype(str) - df['hour'] = df['hour'].astype(int) % 24 - df['mvmt'] = df['vmt'] / 1e6 - - scenarios = df['scenario'].unique() - - hourly_vmt = df.groupby(['scenario', 'hour', 'fuel_class'])['mvmt'].sum().unstack( - level=[0, 2], fill_value=0 - ).copy().reset_index() - - # Ensure all hours are present - for hour in range(24): - if hour not in hourly_vmt.index: - hourly_vmt.loc[hour] = 0 - hourly_vmt = hourly_vmt.sort_index() - - # Create the plot - plt.figure(figsize=(20, height_size)) - x = np.arange(24) # 24 hours - width = 0.35 # width of the bars - - # Get all unique fuel classes across all scenarios - all_fuel_classes = set() - for scenario in scenarios: - all_fuel_classes.update(hourly_vmt[scenario].columns) - - fuel_order = list(fuel_color_map.keys()) - # Sort fuel classes based on the defined order - sorted_fuel_classes = sorted(all_fuel_classes, - key=lambda x: ( - fuel_order.index(x.split('-')[0]) if x.split('-')[0] in fuel_order else len( - fuel_order), x)) - - - # Create color map for fuel_classes - color_map = {} - for fc in sorted_fuel_classes: - fuel, vehicle_class = fc.split('-') - base_color = fuel_color_map[fuel] # Default to black if fuel not found - if any(c in vehicle_class for c in ['7', '8']): - color_map[fc] = darken_color(base_color) - else: - color_map[fc] = base_color - - # Plot stacked bars for each scenario - legend_handles = [] - legend_labels = [] - for i, scenario in enumerate(scenarios): - bottom = np.zeros(24) - for fuel_class in sorted_fuel_classes: - if fuel_class in hourly_vmt[scenario].columns: - values = hourly_vmt[scenario][fuel_class] - else: - values = np.zeros(24) - - bar = plt.bar(x + i * width, values, width, bottom=bottom, color=color_map[fuel_class], edgecolor='black', linewidth=0.5) - bottom += values - - if fuel_class not in legend_labels: - legend_handles.append(bar) - legend_labels.append(fuel_class) - - # plt.title(f'Weekday VMT by Fuel, Class and Scenario: {" vs ".join(scenarios).replace("_", " ")}', fontsize=20) - plt.xlabel('Hour', fontsize=24) - plt.ylabel('Million Vehicle Miles Traveled', fontsize=24) - plt.xticks(x + width / 2, range(24), fontsize=24) - plt.yticks(fontsize=24) - - # Create legend with ordered fuel classes - plt.legend(legend_handles, legend_labels, title='Fuel, Class', fontsize=28, loc='upper left', bbox_to_anchor=(1, 1)) - plt.grid(axis='y', linestyle='--', alpha=0.7) - - # Adjust layout and save - plt.tight_layout() - plt.savefig(f'{output_dir}/hourly_vmt_by_scenario_fuel_class.png', dpi=300, bbox_inches='tight') - plt.close() - - print(f"Hourly VMT plot saved as {output_dir}/hourly_vmt_by_scenario_fuel_class.png") - - -def generate_h3_intersections(network_df, resolution, output_dir): - print(f"Initial network_df shape: {network_df.shape}") - - # Remove rows with NaN values in coordinate columns - coord_columns = ['fromLocationX', 'fromLocationY', 'toLocationX', 'toLocationY'] - network_clean = network_df.dropna(subset=coord_columns) - print(f"Clean network_df shape: {network_clean.shape}") - - # Create bounding box - lats = network_clean[['fromLocationY', 'toLocationY']].values.flatten() - lons = network_clean[['fromLocationX', 'toLocationX']].values.flatten() - bbox = [[ - [min(lats), min(lons)], - [min(lats), max(lons)], - [max(lats), max(lons)], - [max(lats), min(lons)], - [min(lats), min(lons)] # Close the polygon - ]] - - # Generate H3 cells - h3_cells = list(h3.polyfill({'type': 'Polygon', 'coordinates': bbox}, resolution)) - print(f"Number of H3 cells: {len(h3_cells)}") - - if len(h3_cells) == 0: - print("No H3 cells created. Check your bounding box and resolution.") - return pd.DataFrame() - - # Create GeoDataFrame of H3 cells - h3_gdf = gpd.GeoDataFrame( - {'h3_cell': h3_cells}, - geometry=[Polygon(h3.h3_to_geo_boundary(h, geo_json=True)) for h in h3_cells], - crs="EPSG:4326" - ) - - # Create network GeoDataFrame - def create_linestring(row): - return LineString([(row['fromLocationX'], row['fromLocationY']), - (row['toLocationX'], row['toLocationY'])]) - - network_gdf = gpd.GeoDataFrame( - network_clean, - geometry=network_clean.apply(create_linestring, axis=1), - crs="EPSG:4326" - ) - - # Spatial join - joined = gpd.sjoin(h3_gdf, network_gdf, how="inner", predicate="intersects") - print(f"Joined DataFrame shape after spatial join: {joined.shape}") - - if joined.empty: - print("No intersections found between H3 cells and network geometries.") - return pd.DataFrame() - - # Calculate intersections and lengths - def calculate_intersection(row): - try: - h3_poly = Polygon(h3.h3_to_geo_boundary(row['h3_cell'], geo_json=True)) - line = row['geometry'] - intersection = h3_poly.intersection(line) - return pd.Series({'intersection_length': intersection.length}) - except Exception as e: - print(f"Error in calculate_intersection: {e}") - return pd.Series({'intersection_length': 0}) - - tqdm.pandas(desc="Calculating intersections") - joined['intersection_length'] = joined.progress_apply(calculate_intersection, axis=1) - - # Calculate length ratios - joined['length_ratio'] = joined['intersection_length'] / joined['linkLength'] - - # Keep only necessary columns - intersection_df = joined[['h3_cell', 'linkId', 'length_ratio']] - - intersection_df.to_csv(f'{output_dir}/network.h3.csv', index=False) - - return intersection_df - - -def process_h3_data(h3_df, data_df, data_col): - print(f"Initial emissions_df shape: {data_df.shape}") - - # Filter emissions data for the specific pollutant - data_df[data_col] = pd.to_numeric(data_df[data_col], errors='coerce') - data_df_filtered = data_df.dropna() - print(f"Filtered emissions shape: {data_df_filtered.shape}") - - # Merge with intersection data - merged = pd.merge(h3_df, data_df_filtered, on='linkId', how='inner') - print(f"Merged DataFrame shape: {merged.shape}") - - # Calculate normalized emissions - merged[f'weighted_{data_col}'] = merged[data_col] * merged['length_ratio'] - - # Group by H3 cell and sum normalized emissions - result = merged.groupby(['scenario', 'h3_cell'])[f'weighted_{data_col}'].sum().reset_index() - print(f"Final result shape: {result.shape}") - return result - - -def process_h3_emissions(emissions_df, intersection_df, pollutant): - print(f"Initial emissions_df shape: {emissions_df.shape}") - - # Filter emissions data for the specific pollutant - filtered_emissions = emissions_df[emissions_df['pollutant'] == pollutant][['scenario', 'linkId', 'rate']] - filtered_emissions['rate'] = pd.to_numeric(filtered_emissions['rate'], errors='coerce') - filtered_emissions = filtered_emissions.dropna() - print(f"Filtered emissions shape: {filtered_emissions.shape}") - - # Merge with intersection data - merged = pd.merge(intersection_df, filtered_emissions, on='linkId', how='inner') - print(f"Merged DataFrame shape: {merged.shape}") - - # Calculate normalized emissions - merged[f'{pollutant}'] = merged['rate'] * merged['length_ratio'] - - # Group by H3 cell and sum normalized emissions - result = merged.groupby(['scenario', 'h3_cell'])[f'{pollutant}'].sum().reset_index() - print(f"Final result shape: {result.shape}") - return result - - -def plot_h3_heatmap(df, df_col, scenario, output_dir, is_delta, remove_outliers, in_log_scale): - """Create a heatmap using the H3 grid structure with linear or logarithmic color scale and a base map.""" - subset_df = df[df["scenario"] == scenario] - if remove_outliers: - subset_df = remove_outliers_zscore(subset_df, df_col) - - # Create polygons for all H3 cells in the result - polygons = [Polygon(h3.h3_to_geo_boundary(h3_cell, geo_json=True)) for h3_cell in subset_df['h3_cell']] - - # Create GeoDataFrame - gdf = gpd.GeoDataFrame({ - 'h3_cell': subset_df['h3_cell'], - 'h3_var': subset_df[df_col], - 'geometry': polygons - }) - gdf = gdf.set_crs("EPSG:4326") - - # Convert to Web Mercator projection for compatibility with contextily - gdf_mercator = gdf.to_crs(epsg=3857) - - # Create figure and axis - fig, ax = plt.subplots(figsize=(15, 10)) - - vmin, vmax = gdf_mercator['h3_var'].min(), gdf_mercator['h3_var'].max() - - if in_log_scale: - if is_delta: - norm = mcolors.SymLogNorm(linthresh=1e-5, vmin=vmin, vmax=vmax) - else: - gdf_mercator = gdf_mercator[gdf_mercator['h3_var'] > 0] - vmin, vmax = gdf_mercator['h3_var'].min(), gdf_mercator['h3_var'].max() - norm = LogNorm(vmin=vmin, vmax=vmax) - label_suffix = "in log scale" - file_suffix = "log" - else: - if is_delta: - norm = mcolors.TwoSlopeNorm(vmin=vmin, vcenter=0, vmax=vmax) - else: - norm = None - label_suffix = "" - file_suffix = "linear" - - # Choose colormap based on whether it's a delta calculation - if is_delta: - cmap = mcolors.LinearSegmentedColormap.from_list("", ["blue", "lightblue", "white", "pink", "red"]) - else: - cmap = plt.get_cmap('viridis') - - # Plot cells with data - gdf_mercator.plot(column='h3_var', ax=ax, legend=False, cmap=cmap, edgecolor='none', norm=norm, alpha=0.7) - - # Add base map - cx.add_basemap(ax, source=cx.providers.CartoDB.Positron) - - # Add colorbar - sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm) - sm.set_array([]) - if is_delta: - cbar = fig.colorbar(sm, ax=ax, extend='both') - else: - cbar = fig.colorbar(sm, ax=ax, extend='max') - - cbar.ax.tick_params(labelsize=14) - cbar.set_label(f'{df_col.replace("_", ".")} {label_suffix}', rotation=270, labelpad=15, fontsize=18) - - # Set title and adjust plot - # plt.title(f'Emissions Distribution of {df_col.replace("_", ".")}, {scenario} ', fontsize=16) - ax.set_axis_off() - plt.tight_layout() - - # Save figure - outlier_status = "no_outliers" if remove_outliers else "with_outliers" - file_name = f'{output_dir}/{df_col.replace(" ", "_").lower()}_{scenario.replace(" ", "_").lower()}_heatmap_{file_suffix}_{outlier_status}_with_basemap.png' - plt.savefig(file_name, dpi=300, bbox_inches='tight') - plt.close() - print(f"Heatmap with base map saved as {file_name}") - - -def create_h3_histogram(df, output_dir, pollutant, scenario, remove_outliers, in_log_scale): - subset_df = df[df["scenario"] == scenario] - if remove_outliers: - subset_df = remove_outliers_zscore(subset_df, pollutant) - # Extract pollutant values - pollutant_values = subset_df[pollutant].values - - # Create the histogram - plt.figure(figsize=(12, 6)) - - if in_log_scale: - # Use log-spaced bins, but with adjustments for potential zero values - bins = np.logspace(np.log10(pollutant_values.min() + 1e-10), - np.log10(pollutant_values.max()), - num=50) - x_label = f'{pollutant.replace("_", ".")} Emissions (log scale)' - title_label = f'Histogram of {pollutant.replace("_", ".")} Emissions by H3 Cell (Log Scale)' - file_name = f'{output_dir}/{pollutant}_{scenario.replace(" ","_").lower()}_emissions_histogram_log.png' - else: - # Use automatic binning based on Sturges' rule - bins = 'sturges' - x_label = f'{pollutant.replace("_", ".")} Emissions' - title_label = f'Histogram of {pollutant.replace("_", ".")} Emissions by H3 Cell' - file_name = f'{output_dir}/{pollutant}_{scenario.replace(" ","_").lower()}_emissions_histogram.png' - - plt.hist(pollutant_values, bins=bins, edgecolor='black') - - # Set x-axis to log scale if specified - if in_log_scale: - plt.xscale('log') - - # Set labels and title - plt.xlabel(x_label, fontsize=12) - plt.ylabel('Frequency', fontsize=12) - plt.title(title_label, fontsize=14) - - # Add grid for better readability - plt.grid(True, linestyle='--', alpha=0.7) - - # Adjust layout and save - plt.tight_layout() - plt.savefig(file_name, dpi=300, bbox_inches='tight') - plt.close() - print(f"Histogram saved as {file_name}/") - - -def remove_outliers_zscore(df, column, threshold=3): - mean = df[column].mean() - std = df[column].std() - z_scores = np.abs((df[column] - mean) / std) - df_filtered = df[z_scores < threshold].copy() - removed_rows = df[~df.index.isin(df_filtered.index)] - summary_df = pd.DataFrame({ - 'column': [column], - 'mean': [mean], - 'std': [std], - 'num_outliers': [len(removed_rows)] - }) - print(summary_df) - print(removed_rows) - return df_filtered - - -def fast_df_to_gzip(df, output_file, compression_level=5, chunksize=100000): - """ - Write a pandas DataFrame to a compressed CSV.gz file quickly with a progress bar. - - :param df: pandas DataFrame to write - :param output_file: path to the output .csv.gz file - :param compression_level: gzip compression level (1-9, 9 being highest) - :param chunksize: number of rows to write at a time - """ - total_rows = len(df) - - with gzip.open(output_file, 'wt', compresslevel=compression_level) as gz_file: - # Write header - gz_file.write(','.join(df.columns) + '\n') - - # Write data in chunks - with tqdm(total=total_rows, desc="Writing to gzip", unit="rows") as pbar: - for start in range(0, total_rows, chunksize): - end = min(start + chunksize, total_rows) - chunk = df.iloc[start:end] - - csv_buffer = io.StringIO() - chunk.to_csv(csv_buffer, index=False, header=False) - gz_file.write(csv_buffer.getvalue()) - - pbar.update(end - start) - - -def create_model_vmt_comparison_chart(emfac_vmt_file, emfac_area, emfac_scenario, skims_data, famos_scenario, output_dir): - df = pd.read_csv(emfac_vmt_file) - _, ft_emfac_class_map = create_vehicle_class_mapping(df["vehicle_class"].unique()) - filtered_df = df[ - (df['calendar_year'] == emfac_scenario) & - (df['sub_area'].str.contains(f'\({region_to_emfac_area[emfac_area]}\)')) & - (df['vehicle_class'].map(ft_emfac_class_map))].copy() - filtered_df["class"] = df['vehicle_class'].map(ft_emfac_class_map).map( - { - 'Class 4-6 Vocational': 'Class456', - 'Class 7&8 Vocational': 'Class78', - 'Class 7&8 Tractor': 'Class78' - } - ) - filtered_df["fuel_class"] = filtered_df["fuel"] + "-" + filtered_df["class"] - emfac_vmt = filtered_df.groupby(["fuel_class"])["total_vmt"].sum().reset_index() - emfac_vmt.rename(columns={'total_vmt': 'mvmt'}, inplace=True) - emfac_vmt["model"] = "emfac" - famos_vmt = skims_data[skims_data["scenario"] == famos_scenario].groupby( - ["class", "beamFuel"] - )["vmt"].sum().reset_index() - famos_vmt["fuel_class"] = famos_vmt["beamFuel"] + "-" + famos_vmt["class"] - famos_vmt = famos_vmt[["fuel_class", "vmt"]].copy() - famos_vmt.rename(columns={'vmt': 'mvmt'}, inplace=True) - famos_vmt["model"] = "famos" - emfac_famos_vmt = pd.concat([emfac_vmt, famos_vmt], axis=0) - emfac_famos_vmt.to_csv(f"{output_dir}/emfac_famos_vmt_by_fuel_class.csv") - return emfac_famos_vmt - - -def plot_multi_pie_emfac_famos_vmt(data, plot_dir): - def assign_color(fuel_class): - return fuel_color_map[fuel_class.split('-')[0]] - - models = data["model"].unique() - - emfac_data = data[data['model'] == 'emfac'].sort_values('mvmt', ascending=False) - famos_data = data[data['model'] == 'famos'].sort_values('mvmt', ascending=False) - - all_fuel_classes = set(emfac_data['fuel_class']) | set(famos_data['fuel_class']) - for fuel_class in all_fuel_classes: - if fuel_class not in emfac_data['fuel_class'].values: - emfac_data = pd.concat( - [emfac_data, pd.DataFrame({'fuel_class': [fuel_class], 'model': ['EMFAC'], 'mvmt': [0]})], - ignore_index=True) - if fuel_class not in famos_data['fuel_class'].values: - famos_data = pd.concat( - [famos_data, pd.DataFrame({'fuel_class': [fuel_class], 'model': ['FAMOS'], 'mvmt': [0]})], - ignore_index=True) - - emfac_data = emfac_data.sort_values('fuel_class') - famos_data = famos_data.sort_values('fuel_class') - - if emfac_data['mvmt'].sum() == 0 and famos_data['mvmt'].sum() == 0: - print("Error: All VMT values are zero. Cannot create pie chart.") - return - - fig, ax = plt.subplots(figsize=(14, 10)) - size = 0.3 - outer_radius = 1 - inner_radius = outer_radius - size - outer_colors = [assign_color(fuel_class) for fuel_class in famos_data['fuel_class']] - inner_colors = [assign_color(fuel_class) for fuel_class in emfac_data['fuel_class']] - - def make_autopct(values): - def my_autopct(pct): - return f'{pct:.1f}%' if pct >= 1 else '' - - return my_autopct - - def add_labels(wedges, fuel_classes, autopct, colors, radius, inner=False): - for wedge, fuel_class, color in zip(wedges, fuel_classes, colors): - ang = (wedge.theta2 + wedge.theta1) / 2 - pct = wedge.theta2 - wedge.theta1 - if pct * 100 / 360 >= 1: # Only show labels for slices >= 1% - label = autopct(pct * 100 / 360) - theta = np.deg2rad(ang) - - if inner: - start_point = ((inner_radius - size) * np.cos(theta), (inner_radius - size) * np.sin(theta)) - end_point = (0.4 * np.cos(theta), 0.4 * np.sin(theta)) - - bbox_props = dict(boxstyle="round,pad=0.3", fc=color, ec="k", lw=0.72, alpha=0.7) - arrowprops = dict(arrowstyle="-", connectionstyle=f"arc3,rad=0", color='k') - - ax.annotate(f'{fuel_class}\n{label}', xy=start_point, xytext=end_point, - horizontalalignment='center', - verticalalignment='center', - bbox=bbox_props, arrowprops=arrowprops, - fontsize=16) - else: - x = (radius + size / 2 + 0.05) * np.cos(theta) - y = (radius + size / 2 + 0.05) * np.sin(theta) - - bbox_props = dict(boxstyle="round,pad=0.3", fc=color, ec="k", lw=0.72, alpha=0.7) - ax.annotate(f'{fuel_class}\n{label}', xy=(x, y), xytext=(x, y), - horizontalalignment='center', - verticalalignment='center', - bbox=bbox_props, - fontsize=16) - - wedges_outer, texts_outer, autotexts_outer = ax.pie(famos_data['mvmt'], radius=outer_radius, colors=outer_colors, - labels=None, autopct='', pctdistance=0.85, - labeldistance=1.1, - wedgeprops=dict(width=size, edgecolor='white')) - - add_labels(wedges_outer, famos_data['fuel_class'], make_autopct(famos_data['mvmt']), outer_colors, outer_radius) - - wedges_inner, texts_inner, autotexts_inner = ax.pie(emfac_data['mvmt'], radius=inner_radius, colors=inner_colors, - labels=None, autopct='', pctdistance=0.75, - wedgeprops=dict(width=size, edgecolor='white')) - - add_labels(wedges_inner, emfac_data['fuel_class'], make_autopct(emfac_data['mvmt']), inner_colors, inner_radius, inner=True) - - # ax.set_title('VMT Share by Fuel-Class: FAMOS (outer) vs EMFAC (inner)', fontsize=16) - - # handles = [plt.Rectangle((0, 0), 1, 1, fc="w", ec="k", lw=2, alpha=0.5) for _ in range(2)] - # labels = ['FAMOS (Outer)', 'EMFAC (Inner)'] - # ax.legend(handles, labels, title="Models", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1)) - - plt.tight_layout() - output_file = os.path.join(plot_dir, f"{'_'.join(models)}_vmt_multi_level_pie_chart.png") - plt.savefig(output_file, bbox_inches='tight', dpi=300) - plt.close() - print(f"Chart has been saved as '{output_file}'") - - -def plot_pollution_variability_by_process_vehicle_types(skims, pollutant, scenario, output_dir, height_size, font_size): - warnings.filterwarnings("ignore", category=FutureWarning, module="seaborn") - # Filter data for specified scenario and pollutant - data = skims[(skims['scenario'] == scenario) & (skims['pollutant'] == pollutant)].copy() - processes = sorted(skims["process"].unique().tolist()) - - # Create fuel_class category - data['fuel_class'] = data['emfacFuel'].astype(str) + ', ' + data['class'].astype(str) - data['rate_micro_gram'] = data['rate'] * 1e12 - - # Sort fuel_class by median emission rate - fuel_class_order = data.groupby('fuel_class')['rate_micro_gram'].median().sort_values(ascending=False).index - - # Set up the plot - fig, ax = plt.subplots(figsize=(20, height_size)) - - # Create color map for fuel_classes - fuel_class_colors = {} - for fc in data['fuel_class'].unique(): - fuel, vehicle_class = fc.split(',') - fuel = fuel.strip() - vehicle_class = vehicle_class.strip() - base_color = fuel_color_map[fuel] # Default to black if fuel not found - if any(c in vehicle_class for c in ['7', '8']): - fuel_class_colors[fc] = darken_color(base_color) - else: - fuel_class_colors[fc] = base_color - - # Create the box plot with adjusted parameters - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=FutureWarning) - sns.boxplot(x='process', y='rate_micro_gram', hue='fuel_class', data=data, - order=processes, hue_order=fuel_class_order, - palette=fuel_class_colors, - ax=ax, whis=1.5, fliersize=2, showcaps=True, showfliers=True) - - # Add strip plot for additional data points - sns.stripplot(x='process', y='rate_micro_gram', hue='fuel_class', data=data, - order=processes, hue_order=fuel_class_order, - palette=fuel_class_colors, - ax=ax, size=1, jitter=True, dodge=True, alpha=0.3) - - # Customize the plot - ax.set_title(f'{pollutant.replace("_", ".")} Emissions Variability - {scenario}', fontsize=font_size+4) - ax.set_xlabel('Process', fontsize=font_size) - ax.set_ylabel('Microgram per road link', fontsize=font_size) - ax.tick_params(axis='both', which='major', labelsize=font_size) - - # Rotate x-axis labels if needed - plt.setp(ax.get_xticklabels(), rotation=0, ha='right') - - # Move the legend outside the plot - ax.legend(title='Fuel, Class', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=font_size) - - # Use log scale for y-axis if the range of values is large - min_rate = data['rate_micro_gram'].min() - max_rate = data['rate_micro_gram'].max() - - if min_rate <= 0: - print(f"Warning: Minimum rate is {min_rate}, which is zero or negative. Using log scale by default.") - ax.set_yscale('log') - # Set a small positive value for the bottom of the y-axis - ax.set_ylim(bottom=1e-10) # You might need to adjust this value - scale_label = "log" - elif max_rate / min_rate > 1000: - print(f"Using log scale. Max/min ratio: {max_rate/min_rate}") - ax.set_yscale('log') - scale_label = "log" - else: - print(f"Using linear scale. Max/min ratio: {max_rate/min_rate}") - scale_label = "linear" - - plt.tight_layout() - plt.savefig(f'{output_dir}/{pollutant.lower()}_variability_by_process_fuel_class_{scenario.replace(" ", "_").lower()}_{scale_label}_scale.png', dpi=300, bbox_inches='tight') - plt.close() - - -def plot_pollutants_by_process(skims, scenario, plot_dir, height_size, font_size): - # Define process order and color map based on toxicity - process_order = list(process_color_map.keys()) - # Group by pollutant and process, and sum the rates - grouped = skims[skims["scenario"] == scenario].groupby(['pollutant', 'process'])['rate'].sum().unstack() - - # Reorder columns based on process_order - grouped = grouped.reindex(columns=process_order) - - # Normalize the data - normalized = grouped.div(grouped.sum(axis=1), axis=0) - normalized = normalized * 100 - - # Create the stacked bar plot - fig, ax = plt.subplots(figsize=(20, height_size)) - normalized.plot(kind='bar', stacked=True, ax=ax, color=[process_color_map[col] for col in normalized.columns]) - - # Customize the plot - plt.title(f'Normalized Emissions by Process - {scenario}', fontsize=font_size+4) - plt.xlabel('Pollutant', fontsize=font_size) - plt.ylabel('Relative Emissions (%)', fontsize=font_size) - plt.xticks(rotation=0, ha='center', fontsize=font_size) - plt.yticks(fontsize=font_size) - - ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: '{:.0f}%'.format(y))) - ax.set_ylim(0, 100) - - legend = plt.legend(title='Process', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=font_size) - plt.setp(legend.get_title(), fontsize=font_size) - plt.tight_layout() - - # Save the plot - plt.savefig( - f'{plot_dir}/pollutant_by_process_{scenario.replace(" ", "_").lower()}.png', - dpi=300, - bbox_inches='tight' - ) - - # Show the plot - plt.show() diff --git a/src/main/python/emissions/map_emfac_population_with_beam.py b/src/main/python/emissions/map_emfac_population_with_beam.py new file mode 100644 index 00000000000..a9a8ba9da2d --- /dev/null +++ b/src/main/python/emissions/map_emfac_population_with_beam.py @@ -0,0 +1,448 @@ +import json +import logging +import os +import os.path +import shutil +import sys +from typing import Dict, Any, Optional +from collections import defaultdict + +import pandas as pd +import pyarrow as pa +import pyarrow.csv as csv +from joblib import Parallel, delayed + +from _emfac_and_emissions_rates_processing import process_emfac_population +from _emfac_and_emissions_rates_processing import process_emfac_vmt +from _emfac_and_emissions_rates_processing import process_emissions_rates +from _emfac_beam_ft_matching import generate_emfac_mapped_freight_fleet +from _emfac_beam_pax_mapping import generate_emfac_mapped_passenger_vehicle_types +from _emfac_beam_pax_mapping import generate_fleet_from_vehicle_types + +# Get the absolute path to the directory containing this script +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(os.path.dirname(current_dir)) +sys.path.insert(0, parent_dir) + +# Now use absolute import +from python.utils.study_area_config import get_area_config +from python.utils.study_area_config import BeamClasses +from python.utils.study_area_config import get_fuel_key +from python.utils.files_utils import sanitize_name +from python.utils.files_utils import check_files + +pd.set_option('display.max_columns', 20) + + +def create_emfac_id(row): + model_year_group_st = sanitize_name(row['model_year_group']).replace("_","") + vehicle_class_st = sanitize_name(row['vehicle_class']).replace("_","") + fuel_st = sanitize_name(row['fuel']).replace("_","") + return f"{model_year_group_st}{vehicle_class_st}{fuel_st}" + +def generate_emfac_beam_class_mapping(_study_area, _scenario_name, _work_dir, _config, to_filter_out): + """ + Creates vehicle class mapping and saves it to a JSON file if it doesn't exist. + If the file exists, loads and returns the existing mapping. + + Args: + _study_area: Stud Area + _scenario_name: Scenario Name + _work_dir: + _config: Configuration dictionary + to_filter_out: + + Returns: + dict: The vehicle class mapping (either newly created or loaded from existing file) + """ + _vehicle_class_output_file = os.path.join( + _work_dir, + f"{_config["rates"]["output_dir"]}/{_study_area}_vehicle_class_mapping_{_scenario_name}.json" + ) + # Check if the file already exists + if os.path.exists(_vehicle_class_output_file): + print(f"File {_vehicle_class_output_file} already exists. Loading existing mapping.") + with open(_vehicle_class_output_file, 'r') as f: + return json.load(f) + + # Create the mapping + mapping = {} + + table = csv.read_csv( + os.path.join(_work_dir, _config["rates"]["emfac"]["emfac_pop_by_model_year_file"]), + read_options=pa.csv.ReadOptions(use_threads=True) + ) + df = table.to_pandas() + + for vehicle in df["vehicle_class"].unique(): + if 'Utility' in vehicle or 'Public' in vehicle: + mapping[vehicle] = "NotMatched" + elif 'Port' in vehicle or 'POLA' in vehicle or 'POAK' in vehicle: + mapping[vehicle] = "NotMatched" + elif 'SWCV' in vehicle or 'PTO' in vehicle or 'T6TS' in vehicle: + mapping[vehicle] = "NotMatched" + elif vehicle in ['LDA', 'LDT1', 'LDT2', 'MDV']: + mapping[vehicle] = BeamClasses.CLASS_CAR + elif vehicle in ['MCY']: + mapping[vehicle] = BeamClasses.CLASS_BIKE + elif vehicle in ['UBUS']: + mapping[vehicle] = BeamClasses.CLASS_MDP + elif 'LHD' in vehicle: + mapping[vehicle] = BeamClasses.CLASS_2B3_VOCATIONAL + elif 'Class 4' in vehicle or 'Class 5' in vehicle or 'Class 6' in vehicle: + mapping[vehicle] = BeamClasses.CLASS_456_VOCATIONAL + elif 'Class 7' in vehicle or 'Class 8' in vehicle: + if 'Tractor' in vehicle or 'CAIRP' in vehicle: + mapping[vehicle] = BeamClasses.CLASS_78_TRACTOR + else: + mapping[vehicle] = BeamClasses.CLASS_78_VOCATIONAL + elif "T7IS" in vehicle: + mapping[vehicle] = BeamClasses.CLASS_78_TRACTOR + else: + mapping[vehicle] = "NotMatched" + + # Print category groupings + class_groups = defaultdict(list) + for vehicle, vehicle_class in mapping.items(): + if vehicle_class in to_filter_out: + mapping[vehicle] = "NotMatched" + class_groups[mapping[vehicle]].append(vehicle) + for vehicle_class, vehicles in class_groups.items(): + print(f"Category: {vehicle_class}") + for vehicle in vehicles: + print(f" - {vehicle}") + + return {k: v for k, v in mapping.items() if v != "NotMatched"} + + +def prepare_emissions_data_for_mapping(area, scenario, work_dir, config): + mapping_config = config["mapping"] + def categorize_model_year(year): + # https://pubs.acs.org/doi/full/10.1021/acs.est.9b04763 + if year <= 1993: return '1993' + elif year <= 2006: return '2006' + else: return '2018' + def format_emissions_data(emfac_types: pd.DataFrame) -> pd.DataFrame: + result_ft_df = emfac_types.copy() + result_ft_df['mappedClass'] = result_ft_df['vehicle_class'].map(mapping_config["class"]["emfac-ft"]) + result_ft_df.dropna(subset=['mappedClass'], inplace=True) + result_ft_df['mappedFuel'] = result_ft_df['fuel'].map(mapping_config["fuel"]["emfac-ft"]) + result_ft_df.dropna(subset=['mappedFuel'], inplace=True) + + result_pax_df = emfac_types.copy() + result_pax_df['mappedClass'] = result_pax_df['vehicle_class'].map(mapping_config["class"]["emfac-pax"]) + result_pax_df.dropna(subset=['mappedClass'], inplace=True) + result_pax_df['mappedFuel'] = result_pax_df['fuel'].map(mapping_config["fuel"]["emfac-pax"]) + result_pax_df.dropna(subset=['mappedFuel'], inplace=True) + + result_bus_df = emfac_types.copy() + result_bus_df['mappedClass'] = result_bus_df['vehicle_class'].map(mapping_config["class"]["emfac-bus"]) + result_bus_df.dropna(subset=['mappedClass'], inplace=True) + result_bus_df['mappedFuel'] = result_bus_df['fuel'].map(mapping_config["fuel"]["emfac-bus"]) + result_bus_df.dropna(subset=['mappedFuel'], inplace=True) + + result_df = pd.concat([result_ft_df, result_pax_df, result_bus_df]) + + result_df['model_year_group'] = result_df['model_year'].apply(categorize_model_year) + result_df[['county', 'area']] = result_df['sub_area'].str.extract(r'^([^()]+)\s*\(([^)]+)\)') + result_df['county'] = result_df['county'].str.strip().str.lower() + result_df['area'] = result_df['area'].str.strip() + result_df['emfacId'] = result_df.apply(create_emfac_id, axis=1) + return result_df + + emfac_pop = process_emfac_population(area, scenario, work_dir, config, format_emissions_data) + print("\n=== EMFAC Population ===\n") + print(f"total_population: {emfac_pop["population"].sum() / 1_000_000:.1f}M") + # + print("\n=== EMFAC VMT ===\n") + emfac_vmt = process_emfac_vmt(area, scenario, work_dir, config, format_emissions_data) + print(f"total_vmt: {emfac_vmt["total_vmt"].sum() / 1_000_000:.1f}M") + # + emfac_fleet = pd.merge(emfac_pop, emfac_vmt[["emfacId", "total_vmt", "vmt_proportion"]], on='emfacId', how='left') + # + print("\n=== CARB Emissions Rates ===\n") + emfac_rates = process_emissions_rates(area, scenario, work_dir, config, format_emissions_data) + print(f"rates: {len(emfac_rates):,}") + + return emfac_fleet, emfac_rates + + +def assign_emission_rates_to_vehicle_types(scenario, emissions_rates, emfac_fleet, work_dir, config): + """ + Process freight and passenger vehicle emissions by assigning EMFAC IDs and emissions rates. + + This function: + 1. Builds new freight vehicle types and assigns them to carriers + 2. Creates or loads passenger vehicle types + 3. Assigns emissions rates to all vehicle types + + Args: + scenario (str): Scenario name + emissions_rates (DataFrame): DataFrame containing emissions rates + emfac_fleet (DataFrame): DataFrame containing EMFAC population and VMT data + work_dir (str): Working directory for file operations + config (dict): Configuration dictionary + + Returns: + None: Files are saved to disk + """ + # ###### + def format_beam_vehicle_types(vehicle_types: pd.DataFrame) -> pd.DataFrame: + # Validate inputs + result_df = vehicle_types.copy() + result_df['fuel_key'] = result_df.apply(get_fuel_key, axis=1) + result_df['mappedFuel'] = result_df['fuel_key'].map(config["mapping"]["fuel"]["beam"]) + na_count = result_df['mappedFuel'].isna().sum() + if na_count > 0: + logging.warning(f"{na_count} vehicle types could not be mapped to EMFAC fuel types") + result_df['mappedClass'] = result_df['vehicleCategory'] + return result_df + # ###### + + print("\n=== Map EMFAC To BEAM Population ===\n") + + # Define output file paths + carriers_out_file = os.path.join(work_dir, f"{config['beam']['carriers_file'].replace('.csv', '--EM.csv')}") + ft_vehtypes_out_file = os.path.join(work_dir, + f"{config['beam']['ft_vehicle_types_file'].replace('.csv', '--EM.csv')}") + pax_vehtypes_out_file = os.path.join(work_dir, + f"{config['beam']['pax_vehicle_types_file'].replace('.csv', '--EM.csv')}") + emissions_rates_dir = os.path.join( + os.path.dirname(os.path.join(work_dir, f"{config['beam']['ft_vehicle_types_file']}")), + f"emissions/{scenario.replace('_', '-')}" + ) + + + # Process freight vehicles + if check_files([carriers_out_file, ft_vehtypes_out_file], config["override_fleet"]): + logging.info("All carriers and freight vehicle types emissions files have already been created") + logging.info(f" carriers: {carriers_out_file}") + logging.info(f" freight vehicle types: {ft_vehtypes_out_file}") + new_ft_vehicle_types = pd.read_csv(ft_vehtypes_out_file) + else: + new_carriers, new_ft_vehicle_types = generate_emfac_mapped_freight_fleet( + emfac_fleet, BeamClasses.get_freight_classes(), work_dir, config, format_beam_vehicle_types + ) + logging.info(f"Saving updated files to:\n {carriers_out_file}\n {ft_vehtypes_out_file}") + new_ft_vehicle_types.to_csv(ft_vehtypes_out_file, index=False) + new_carriers.to_csv(carriers_out_file, index=False) + + # Process passenger vehicles + if check_files([pax_vehtypes_out_file], config["override_fleet"]): + logging.info("Passenger vehicle types emissions files have already been created:") + logging.info(f" passenger vehicle types: {pax_vehtypes_out_file}") + new_pax_vehicle_types = pd.read_csv(pax_vehtypes_out_file) + temp = pd.read_csv(os.path.join(work_dir, f"{config['beam']['pax_vehicle_types_file']}")) + other_pax_vehicle_types = temp[temp["vehicleCategory"].isin( + BeamClasses.get_freight_classes() + new_pax_vehicle_types["vehicleCategory"].unique().tolist())] + else: + # Generate passenger vehicle types + new_pax_vehicle_types, other_pax_vehicle_types = generate_emfac_mapped_passenger_vehicle_types( + emfac_fleet, + car_class=BeamClasses.CLASS_CAR, + bike_class=BeamClasses.CLASS_BIKE, + transit_class=BeamClasses.CLASS_MDP, + filter_out_classes=BeamClasses.get_freight_classes(), + work_dir=work_dir, + config=config, + format_func=format_beam_vehicle_types, + ) + + vehicles_output = os.path.join(work_dir, f"{config['beam']['pax_vehicles_file'].replace('.csv', '--EM.csv')}") + if not check_files([vehicles_output], config["override_fleet"]): + pax_vehicles = generate_fleet_from_vehicle_types( + new_pax_vehicle_types, + car_class=BeamClasses.CLASS_CAR, + bike_class=BeamClasses.CLASS_BIKE, + work_dir=work_dir, + config=config + ) + vehicles_output = os.path.join(work_dir, f"{vehicles_output}") + pax_vehicles.to_csv(vehicles_output, index=False) + + # Prepare for emissions rates processing + vehtypes_with_emfac_id = pd.concat([new_ft_vehicle_types, new_pax_vehicle_types], ignore_index=True) + vehtypes_with_emfac_id = vehtypes_with_emfac_id.fillna("") + + # Prepare directory for emissions rates files + try: + if os.path.exists(emissions_rates_dir): + shutil.rmtree(emissions_rates_dir) + os.makedirs(emissions_rates_dir, exist_ok=True) + logging.info(f"Ready to write new data to the directory {emissions_rates_dir}") + except Exception as e: + logging.error(f"Failed to prepare directory {emissions_rates_dir}: {e}") + + # Process vehicle emissions in parallel with chunking + chunk_size = 100 + results = [] + for i in range(0, len(vehtypes_with_emfac_id), chunk_size): + chunk = vehtypes_with_emfac_id.iloc[i:i + chunk_size] + chunk_results = Parallel(n_jobs=-1, timeout=600)( + delayed(process_single_vehicle_type)( + veh_type, + emissions_rates, + f"{emissions_rates_dir}/" + ) for _, veh_type in chunk.iterrows() + ) + results.extend(chunk_results) + del chunk_results # Free memory + + # Update emissions rate file paths in vehicle types + path_parts = emissions_rates_dir.split('/') + em_index = path_parts.index("emissions") + shortened_path = '/'.join(path_parts[em_index:]) + for veh_type_id, emfac_id in results: + if veh_type_id: + relative_rates_filepath = f"{shortened_path}/{emfac_id}.csv" + vehtypes_with_emfac_id.loc[ + vehtypes_with_emfac_id['vehicleTypeId'] == veh_type_id, 'emissionsRatesFile' + ] = relative_rates_filepath + + # Save updated vehicle types + logging.info(f"Writing:\n{ft_vehtypes_out_file}\n{pax_vehtypes_out_file}") + + # Save freight vehicle types + ft_freight_mask = (vehtypes_with_emfac_id['vehicleCategory'].isin(BeamClasses.get_freight_classes())) + updated_ft_vehicle_types = vehtypes_with_emfac_id[ft_freight_mask].copy() + updated_ft_vehicle_types.drop(['emfacId', 'oldVehicleTypeId', 'vehicleClass'], axis=1, inplace=True) + updated_ft_vehicle_types.to_csv(ft_vehtypes_out_file, index=False) + + # Save passenger vehicle types + updated_pax_vehicle_types_others = other_pax_vehicle_types.copy() + updated_pax_vehicle_types_others['emissionsRatesFile'] = "" + updated_pax_vehicle_types = pd.concat( + [vehtypes_with_emfac_id[~ft_freight_mask].copy(), other_pax_vehicle_types], + axis=0 + ) + updated_pax_vehicle_types.drop(['emfacId', 'oldVehicleTypeId', 'vehicleClass'], axis=1, inplace=True) + updated_pax_vehicle_types.to_csv(pax_vehtypes_out_file, index=False) + + +def process_single_vehicle_type( + veh_type: Dict[str, Any], + emissions_rates: pd.DataFrame, + rates_prefix_filepath: str +) -> Optional[tuple[str, str]]: + """ + Process and save emissions rates for a single vehicle type. + + Filters the emissions rates for a specific vehicle type identified by its + vehicleTypeId, removes the emfacId column, and saves the filtered data + to a CSV file in the specified directory. + + Args: + veh_type (Dict[str, Any]): Dictionary containing vehicle type information, + must include 'vehicleTypeId' key + emissions_rates (pd.DataFrame): DataFrame containing emissions rates data + with 'emfacId' column matching vehicleTypeId values + rates_prefix_filepath (str): Directory path prefix where the CSV file + will be saved + + Returns: + Optional[str]: The vehicleTypeId if processing was successful, None if + no emissions data was found or an error occurred + + Raises: + IOError: If there is an error writing the CSV file + """ + try: + veh_type_id = veh_type['vehicleTypeId'] + emfac_id = veh_type['emfacId'] + + # Filter emissions_rates for the current vehicle type + veh_emissions = emissions_rates[emissions_rates['emfacId'] == emfac_id].copy() + + if veh_emissions.empty: + logging.warning(f"No emissions data found for vehicle type {veh_type_id}") + return None + + # Generate the file path + file_path = f"{rates_prefix_filepath}{emfac_id}.csv" + + # Save the emissions rates to a CSV file only if it doesn't exist + if not os.path.exists(file_path): + print(f"Writing emissions data to {file_path}") + logging.info(f"Writing emissions data to {file_path}") + os.makedirs(os.path.dirname(file_path), exist_ok=True) + veh_emissions.to_csv(file_path, index=False) + print(f"Created new file: {file_path}") + else: + print(f"File already generated: {file_path}") + + return veh_type_id, emfac_id + + except KeyError as e: + logging.error(f"Missing required key in vehicle type data: {e}") + return None + except IOError as e: + logging.error(f"Error writing emissions data file: {e}") + return None + except Exception as e: + logging.error(f"Unexpected error processing vehicle type: {e}") + return None + +def print_unmapped(df, mapped_col, col_to_be_mapped): + unmapped_classes = df[df[mapped_col].isna()][col_to_be_mapped].unique() + if len(unmapped_classes) > 0: + unmapped_classes_message = f"The following {col_to_be_mapped} were not mapped to {mapped_col}:\n" + formatted_list = "" + current_line = "" + for vehicle_class in unmapped_classes: + # Check if adding this class would exceed the line limit + if len(current_line + vehicle_class) > 115: # 115 to leave room for comma and space + formatted_list += current_line.rstrip(", ") + "\n" + current_line = vehicle_class + ", " + else: + current_line += vehicle_class + ", " + # Add the last line + if current_line: + formatted_list += current_line.rstrip(", ") + print(f"{unmapped_classes_message}{formatted_list}") + + +def run(): + # Configuration parameters + area = "sfbay" + run_batch = "20240123" + run_batch_label = run_batch.replace("-", "") + scenario = "2018-Baseline" + scenario_label = scenario.replace("_", "-") + + study_area_config = get_area_config(area) + config = study_area_config["emissions"][scenario] + config["rates"]["output_dir"] = f"emissions/{run_batch}" + beam_config = config["beam"] + beam_config["carriers_file"] = f"beam-ft/{run_batch}/{scenario}/carriers--{scenario_label}.csv" + beam_config["payloads_file"] = f"beam-ft/{run_batch}/{scenario}/payloads--{scenario_label}.csv" + beam_config["ft_vehicle_types_file"] = f"vehicle-tech/vehicleTypes--frism--{scenario_label}.csv" + beam_config["pax_vehicle_types_file"] = f"vehicle-tech/vehicleTypes--atlas--2023-Baseline.csv" + emfac_class_map = generate_emfac_beam_class_mapping( + area, scenario, study_area_config["work_dir"], config, to_filter_out=[BeamClasses.CLASS_2B3_VOCATIONAL] + ) + config["mapping"]["class"]["emfac"] = emfac_class_map + # Write Config file to keep track of runs + # Write it onl after all modification to config are completed + emissions_work_dir = os.path.join(study_area_config["work_dir"], config["rates"]["output_dir"]) + os.makedirs(emissions_work_dir, exist_ok=True) + with open(os.path.join(study_area_config["work_dir"], f"{emissions_work_dir}/{area}_emissions_config_{scenario}.json"), 'w') as f: + json.dump(study_area_config, f, indent=2) + + # ################################################################# + + print(f"\n{'='*50}") + print(f" EMISSIONS PROCESSING - {area.upper()} REGION") + print(f" Run Batch: {run_batch}") + print(f" Scenario: {scenario}") + print(f"{'='*50}\n") + + work_dir = study_area_config["work_dir"] + emfac_fleet, emfac_rates = prepare_emissions_data_for_mapping(area, scenario, work_dir, config) + assign_emission_rates_to_vehicle_types(scenario, emfac_rates, emfac_fleet, work_dir, config) + print(f" DONE") + + # ################################################################# + + +if __name__ == "__main__": + run() \ No newline at end of file diff --git a/src/main/python/emissions/process_emissions_skims.py b/src/main/python/emissions/process_emissions_skims.py new file mode 100644 index 00000000000..128116ba49e --- /dev/null +++ b/src/main/python/emissions/process_emissions_skims.py @@ -0,0 +1,648 @@ +import time +from pathlib import Path + +import pyarrow as pa +import pyarrow.compute as pc +import pyarrow.csv as pv +from pyproj import Transformer +from shapely.geometry import LineString + +from _beam_emissions_plotting import * +from _emfac_emissions_mapping import * + +# Configure pandas display options +pd.set_option('display.max_columns', 20) + +# ################ +# ### Constants ## +# ################ + +# Define schema for skims data +SKIMS_SCHEMA = pa.schema([ + ('hour', pa.int64()), + ('linkId', pa.int64()), + ('tazId', pa.string()), + ('vehicleTypeId', pa.string()), + ('emissionsProcess', pa.string()), + ('speedInMps', pa.float64()), + ('energyInJoule', pa.float64()), + ('observations', pa.int64()), + ('iterations', pa.int64()), + ('CH4', pa.float64()), + ('CO', pa.float64()), + ('CO2', pa.float64()), + ('HC', pa.float64()), + ('NH3', pa.float64()), + ('NOx', pa.float64()), + ('PM', pa.float64()), + ('PM10', pa.float64()), + ('PM2_5', pa.float64()), + ('ROG', pa.float64()), + ('SOx', pa.float64()), + ('TOG', pa.float64()) +]) + + +# ################ +# ### Functions ## +# ################ + +def read_skims_emissions_chunked(skims_file, vehicleTypes_file, vehicleTypeId_filter, network, expansion_factor, + scenario_name, chunk_size=1000000): + """ + Read and process emissions data from skims file in chunks + + Args: + skims_file: Path to skims emissions CSV file + vehicleTypes_file: Path to vehicle types CSV file + vehicleTypeId_filter: Filter string for vehicle type IDs + network: Network dataframe with link information + expansion_factor: Factor to scale observations + scenario_name: Name of the scenario + chunk_size: Size of chunks to process at once + + Returns: + DataFrame with processed emissions data + """ + start_time = time.time() + + # Process vehicleTypes file + vehicleTypes = pd.read_csv(vehicleTypes_file) + vehicleTypes['emfacFuel'] = vehicleTypes['emfacId'].str.split('-').str[-1] + vehicleTypes['class'] = vehicleTypes['vehicleCategory'].str.replace('Vocational|Tractor', '', + regex=True).str.strip() + vehicleTypes['beamFuel'] = np.where( + (vehicleTypes['primaryFuelType'].str.lower() == fuel_emfac2beam_map["Elec"]) & vehicleTypes[ + 'secondaryFuelType'].notna(), + 'Phe', + vehicleTypes['primaryFuelType'].str.lower().map(fuel_beam2emfac_map) + ) + + # Initialize an empty list to store the processed chunks + result_chunks = [] + + # Set up the CSV reader with chunking + csv_reader = pv.open_csv( + skims_file, + read_options=pv.ReadOptions(block_size=chunk_size, use_threads=True), + parse_options=pv.ParseOptions(delimiter=','), + convert_options=pv.ConvertOptions(column_types=SKIMS_SCHEMA) + ) + + # Get total file size for progress bar + total_size = os.path.getsize(skims_file) + + # Initialize progress bar + pbar = tqdm(total=total_size, unit='B', unit_scale=True, desc="Processing chunks", + position=0, leave=True, mininterval=1.0, maxinterval=10.0, miniters=1) + + # Process the skims file in chunks + for chunk in csv_reader: + chunk_size = chunk.nbytes + + # Filter the chunk + mask = pc.match_substring(chunk['vehicleTypeId'], pattern=vehicleTypeId_filter) + filtered_chunk = chunk.filter(mask) + + # Perform calculations in PyArrow + observations_expansion = pc.multiply( + filtered_chunk['observations'], pc.cast(pa.scalar(expansion_factor), pa.float64()) + ) + + new_columns = [] + new_fields = [] + for pollutant in pollutant_columns.keys(): + new_fields.append(pa.field(f'scaled_{pollutant}', pa.float64(), True)) + new_columns.append(pc.multiply( + pc.divide( + filtered_chunk[pollutant], pc.cast(pa.scalar(1e6), pa.float64()) + ), + observations_expansion + )) + + new_fields.append(pa.field('kwh', pa.float64(), True)) + new_columns.append( + pc.multiply( + pc.divide( + filtered_chunk['energyInJoule'], pc.cast(pa.scalar(3.6e6), pa.float64()) + ), + observations_expansion + ) + ) + + new_fields.append(pa.field('vht', pa.float64(), True)) + new_columns.append( + pc.multiply( + pc.divide( + filtered_chunk['travelTimeInSecond'], pc.cast(pa.scalar(3.6e3), pa.float64()) + ), + observations_expansion + ) + ) + + # Create a new RecordBatch with additional columns + new_schema = filtered_chunk.schema + for field in new_fields: + new_schema = new_schema.append(field) + + new_columns = filtered_chunk.columns + new_columns + filtered_chunk = pa.RecordBatch.from_arrays(new_columns, schema=new_schema) + + # Convert to pandas + df_chunk = filtered_chunk.to_pandas() + + # Merge with vehicleTypes and network + df_chunk_merged = ( + df_chunk + .merge(vehicleTypes[['vehicleTypeId', 'class', 'beamFuel', 'emfacFuel', 'emfacId']], on='vehicleTypeId', + how='left') + .merge(network[['linkId', 'linkLength']], on='linkId', how='left') + ) + + # Calculate annualHourlyMVMT + df_chunk_merged['vmt'] = (df_chunk_merged['linkLength'] * 6.21371192e-4) * observations_expansion + + # Rename column + df_chunk_merged.rename(columns={'emissionsProcess': 'process'}, inplace=True) + + # Melt the dataframe + id_vars = ['hour', 'linkId', 'tazId', 'emfacId', 'class', 'beamFuel', 'emfacFuel', 'process', 'kwh', 'vmt', + 'vht'] + value_vars = [f'scaled_{pollutant}' for pollutant in pollutant_columns.keys()] + melted_chunk = df_chunk_merged.melt( + id_vars=id_vars, + value_vars=value_vars, + var_name='pollutant', + value_name='rate' + ) + melted_chunk['pollutant'] = melted_chunk['pollutant'].str.replace('scaled_', '') + melted_chunk['scenario'] = scenario_name + + result_chunks.append(melted_chunk) + + # Update progress bar + pbar.update(chunk_size) + + # Close progress bar + pbar.close() + + # Combine all processed chunks + melted = pd.concat(result_chunks, ignore_index=True) + + end_time = time.time() + print(f"Time taken to read the file: {end_time - start_time:.2f} seconds to read file {skims_file}") + + return melted + + +def create_model_vmt_comparison_chart(emfac_vmt_file, emfac_area, emfac_scenario, skims_data, famos_scenario, + output_dir): + """ + Create a comparison chart between EMFAC and FAMOS VMT data + + Args: + emfac_vmt_file: Path to EMFAC VMT CSV file + emfac_area: Area name for EMFAC data + emfac_scenario: Scenario year for EMFAC data + skims_data: Processed skims data + famos_scenario: Name of the FAMOS scenario + output_dir: Directory to save output + + Returns: + DataFrame with combined EMFAC and FAMOS VMT data + """ + df = pd.read_csv(emfac_vmt_file) + _, ft_emfac_class_map = create_vehicle_class_mapping(df["vehicle_class"].unique()) + filtered_df = df[ + (df['calendar_year'] == emfac_scenario) & + (df['sub_area'].str.contains(fr'\({region_to_carb_area[emfac_area]}\)')) & + (df['vehicle_class'].map(ft_emfac_class_map))].copy() + filtered_df["class"] = df['vehicle_class'].map(ft_emfac_class_map).map( + { + 'Class 4-6 Vocational': 'Class456', + 'Class 7&8 Vocational': 'Class78', + 'Class 7&8 Tractor': 'Class78' + } + ) + filtered_df["fuel_class"] = filtered_df["fuel"] + "-" + filtered_df["class"] + emfac_vmt = filtered_df.groupby(["fuel_class"])["total_vmt"].sum().reset_index() + emfac_vmt.rename(columns={'total_vmt': 'mvmt'}, inplace=True) + emfac_vmt["model"] = "emfac" + famos_vmt = skims_data[skims_data["scenario"] == famos_scenario].groupby( + ["class", "beamFuel"] + )["vmt"].sum().reset_index() + famos_vmt["fuel_class"] = famos_vmt["beamFuel"] + "-" + famos_vmt["class"] + famos_vmt = famos_vmt[["fuel_class", "vmt"]].copy() + famos_vmt.rename(columns={'vmt': 'mvmt'}, inplace=True) + famos_vmt["model"] = "famos" + emfac_famos_vmt = pd.concat([emfac_vmt, famos_vmt], axis=0) + emfac_famos_vmt.to_csv(f"{output_dir}/emfac_famos_vmt_by_fuel_class.csv") + return emfac_famos_vmt + + +def load_network(network_file, source_epsg): + """ + Load and transform network data + + Args: + network_file: Path to network CSV file + source_epsg: Source EPSG code for coordinate transformation + + Returns: + DataFrame with network data + """ + # Read and process network file + network = pd.read_csv(network_file) + transformer = Transformer.from_crs(source_epsg, "EPSG:4326", always_xy=True) + + # Vectorized coordinate conversion + network[['fromLocationX', 'fromLocationY']] = network.apply( + lambda row: pd.Series(transformer.transform(row['fromLocationX'], row['fromLocationY'])), + axis=1, result_type='expand' + ) + network[['toLocationX', 'toLocationY']] = network.apply( + lambda row: pd.Series(transformer.transform(row['toLocationX'], row['toLocationY'])), + axis=1, result_type='expand' + ) + + return network[['linkId', 'linkLength', 'fromLocationX', 'fromLocationY', 'toLocationX', 'toLocationY']] + + +def generate_h3_intersections(network_df, resolution, output_dir): + """ + Generate H3 cell intersections with network + + Args: + network_df: Network dataframe + resolution: H3 resolution + output_dir: Directory to save output + + Returns: + DataFrame with intersection data + """ + print(f"Initial network_df shape: {network_df.shape}") + + # Remove rows with NaN values in coordinate columns + coord_columns = ['fromLocationX', 'fromLocationY', 'toLocationX', 'toLocationY'] + network_clean = network_df.dropna(subset=coord_columns) + print(f"Clean network_df shape: {network_clean.shape}") + + # Create bounding box + lats = network_clean[['fromLocationY', 'toLocationY']].values.flatten() + lons = network_clean[['fromLocationX', 'toLocationX']].values.flatten() + bbox = [[ + [min(lats), min(lons)], + [min(lats), max(lons)], + [max(lats), max(lons)], + [max(lats), min(lons)], + [min(lats), min(lons)] # Close the polygon + ]] + + # Generate H3 cells + h3_cells = list(h3.polyfill({'type': 'Polygon', 'coordinates': bbox}, resolution)) + print(f"Number of H3 cells: {len(h3_cells)}") + + if len(h3_cells) == 0: + print("No H3 cells created. Check your bounding box and resolution.") + return pd.DataFrame() + + # Create GeoDataFrame of H3 cells + h3_gdf = gpd.GeoDataFrame( + {'h3_cell': h3_cells}, + geometry=[Polygon(h3.h3_to_geo_boundary(h, geo_json=True)) for h in h3_cells], + crs="EPSG:4326" + ) + + # Create network GeoDataFrame + def create_linestring(row): + return LineString([(row['fromLocationX'], row['fromLocationY']), + (row['toLocationX'], row['toLocationY'])]) + + network_gdf = gpd.GeoDataFrame( + network_clean, + geometry=network_clean.apply(create_linestring, axis=1), + crs="EPSG:4326" + ) + + # Spatial join + joined = gpd.sjoin(h3_gdf, network_gdf, how="inner", predicate="intersects") + print(f"Joined DataFrame shape after spatial join: {joined.shape}") + + if joined.empty: + print("No intersections found between H3 cells and network geometries.") + return pd.DataFrame() + + # Calculate intersections and lengths + def calculate_intersection(row): + try: + h3_poly = Polygon(h3.h3_to_geo_boundary(row['h3_cell'], geo_json=True)) + line = row['geometry'] + intersection = h3_poly.intersection(line) + return pd.Series({'intersection_length': intersection.length}) + except Exception as e: + print(f"Error in calculate_intersection: {e}") + return pd.Series({'intersection_length': 0}) + + tqdm.pandas(desc="Calculating intersections") + joined['intersection_length'] = joined.progress_apply(calculate_intersection, axis=1) + + # Calculate length ratios + joined['length_ratio'] = joined['intersection_length'] / joined['linkLength'] + + # Keep only necessary columns + intersection_df = joined[['h3_cell', 'linkId', 'length_ratio']] + + intersection_df.to_csv(f'{output_dir}/network.h3.csv', index=False) + + return intersection_df + + +def process_h3_data(h3_df, data_df, data_col): + """ + Process H3 data for a given data column + + Args: + h3_df: H3 intersection dataframe + data_df: Data dataframe + data_col: Column name for data to process + + Returns: + DataFrame with H3 cell data + """ + print(f"Initial emissions_df shape: {data_df.shape}") + + # Filter emissions data for the specific pollutant + data_df[data_col] = pd.to_numeric(data_df[data_col], errors='coerce') + data_df_filtered = data_df.dropna() + print(f"Filtered emissions shape: {data_df_filtered.shape}") + + # Merge with intersection data + merged = pd.merge(h3_df, data_df_filtered, on='linkId', how='inner') + print(f"Merged DataFrame shape: {merged.shape}") + + # Calculate normalized emissions + merged[f'weighted_{data_col}'] = merged[data_col] * merged['length_ratio'] + + # Group by H3 cell and sum normalized emissions + result = merged.groupby(['scenario', 'h3_cell'])[f'weighted_{data_col}'].sum().reset_index() + print(f"Final result shape: {result.shape}") + return result + + +def process_h3_emissions(emissions_df, intersection_df, pollutant): + """ + Process H3 emissions data for a specific pollutant + + Args: + emissions_df: Emissions dataframe + intersection_df: H3 intersection dataframe + pollutant: Pollutant name + + Returns: + DataFrame with H3 cell emissions data + """ + print(f"Initial emissions_df shape: {emissions_df.shape}") + + # Filter emissions data for the specific pollutant + filtered_emissions = emissions_df[emissions_df['pollutant'] == pollutant][['scenario', 'linkId', 'rate']] + filtered_emissions['rate'] = pd.to_numeric(filtered_emissions['rate'], errors='coerce') + filtered_emissions = filtered_emissions.dropna() + print(f"Filtered emissions shape: {filtered_emissions.shape}") + + # Merge with intersection data + merged = pd.merge(intersection_df, filtered_emissions, on='linkId', how='inner') + print(f"Merged DataFrame shape: {merged.shape}") + + # Calculate normalized emissions + merged[f'{pollutant}'] = merged['rate'] * merged['length_ratio'] + + # Group by H3 cell and sum normalized emissions + result = merged.groupby(['scenario', 'h3_cell'])[f'{pollutant}'].sum().reset_index() + print(f"Final result shape: {result.shape}") + return result + + +def calculate_delta_emissions(emissions_df, pollutant, scenario1, scenario2): + """ + Calculate delta emissions between two scenarios + + Args: + emissions_df: Emissions dataframe + pollutant: Pollutant name + scenario1: First scenario name + scenario2: Second scenario name + + Returns: + DataFrame with delta emissions data + """ + pivot_df = emissions_df.pivot(index='h3_cell', columns='scenario', values=pollutant).reset_index() + pivot_df = pivot_df.fillna(0) + pivot_df["scenario"] = f"{scenario1}-{scenario2}" + pivot_df[f'Delta_{pollutant}'] = pivot_df[scenario1] - pivot_df[scenario2] + return pivot_df + + +# ################ +# ##### Main ##### +# ################ + +if __name__ == "__main__": + # Input parameters + area = "sfbay" + batch = "2024-01-23" + mode_to_filter = "-TRUCK-" + expansion_factor = 1 / 0.1 + source_epsg = "EPSG:26910" + selected_pollutants = ['PM2_5', 'NOx', 'CO', 'ROG', 'CO2', 'HC'] + h3_resolution = 8 + + # File paths + emfac_vmt_file = os.path.expanduser( + f"~/Workspace/Models/emfac/Default_Statewide_2018_2025_2030_2040_2050_Annual_vmt_20240612233346.csv") + run_dir = os.path.expanduser(f"~/Workspace/Simulation/{area}/beam-runs/{batch}") + scenario_2018 = "2018_Baseline" + scenario_2050 = "2050_Refhighp6" + skims_2018_file = f"{run_dir}/{scenario_2018}/0.skimsEmissions.csv.gz" + skims_2050_file = f"{run_dir}/{scenario_2050}/0.skimsEmissions.csv.gz" + network_file = f"{run_dir}/network.csv.gz" + plan_dir = os.path.expanduser(f"~/Workspace/Simulation/{area}/beam-freight/{batch}") + types_2018_file = f"{plan_dir}/vehicle-tech/ft-vehicletypes--{scenario_2018.replace('_', '-')}-TrAP.csv" + types_2050_file = f"{plan_dir}/vehicle-tech/ft-vehicletypes--{scenario_2050.replace('_', '-')}-TrAP.csv" + tours_2018_file = f"{plan_dir}/{scenario_2018}/tours--{scenario_2018.replace('_', '-')}.csv" + tours_2050_file = f"{plan_dir}/{scenario_2050}/tours--{scenario_2050.replace('_', '-')}.csv" + carriers_2018_file = f"{plan_dir}/{scenario_2018}/carriers--{scenario_2018.replace('_', '-')}-TrAP.csv" + carriers_2050_file = f"{plan_dir}/{scenario_2050}/carriers--{scenario_2050.replace('_', '-')}-TrAP.csv" + + # Output directories + plot_dir = f'{run_dir}/_plots' + Path(plot_dir).mkdir(parents=True, exist_ok=True) + + # Create scenario labels + scenario_2018_label = scenario_2018.replace("_", " ") + scenario_2050_label = scenario_2050.replace("_", " ").replace("HOPhighp2", "HAVF") + + print("Loading network data...") + network = load_network(network_file, source_epsg) + network_h3_intersection = generate_h3_intersections(network, h3_resolution, run_dir) + + print("Processing skims data...") + skims_2018 = read_skims_emissions_chunked( + skims_2018_file, + types_2018_file, + mode_to_filter, + network, + expansion_factor, + scenario_2018_label + ) + + skims_2050 = read_skims_emissions_chunked( + skims_2050_file, + types_2050_file, + mode_to_filter, + network, + expansion_factor, + scenario_2050_label + ) + + skims = pd.concat([skims_2018, skims_2050]) + print(f"Read {len(skims)} rows of skims") + + print("Processing FAMOS tours data...") + # Load tours data + tours_2018 = pd.read_csv(tours_2018_file)[["tourId", 'departureTimeInSec']] + tours_2050 = pd.read_csv(tours_2050_file)[["tourId", 'departureTimeInSec']] + carriers_2018 = pd.read_csv(carriers_2018_file)[["tourId", 'vehicleTypeId']] + carriers_2050 = pd.read_csv(carriers_2050_file)[["tourId", 'vehicleTypeId']] + types_2018 = pd.read_csv(types_2018_file)[ + ["vehicleTypeId", 'vehicleCategory', 'primaryFuelType', 'secondaryFuelType']] + types_2050 = pd.read_csv(types_2050_file)[ + ["vehicleTypeId", 'vehicleCategory', 'primaryFuelType', 'secondaryFuelType']] + + # Process and merge tours data + tours_types_2018 = pd.merge(tours_2018, pd.merge(carriers_2018, types_2018, on="vehicleTypeId"), on="tourId") + tours_types_2018["scenario"] = scenario_2018_label + tours_types_2050 = pd.merge(tours_2050, pd.merge(carriers_2050, types_2050, on="vehicleTypeId"), on="tourId") + tours_types_2050["scenario"] = scenario_2050_label + famos_tours = pd.concat([tours_types_2018, tours_types_2050]) + + print("Calculating VMT...") + famos_vmt = skims.groupby(['scenario', 'hour', 'beamFuel', 'class'])['vmt'].sum().reset_index().copy() + + print("Creating VMT comparison with EMFAC...") + emfac_famos_vmt = create_model_vmt_comparison_chart( + emfac_vmt_file, area, 2050, skims, scenario_2050_label, plot_dir + ) + + print("Processing activities...") + driving_process_activity = skims[ + (skims["process"].isin(["RUNEX", "PMBW", "PMTW", "RUNLOSS"])) & + (skims["vht"] > 0) + ].groupby(["scenario", "linkId"])["vmt"].sum().reset_index(name="vmt") + + h3_vmt = process_h3_data(network_h3_intersection, driving_process_activity, "vmt") + vmt_column = "Weighted VMT from driving activities" + h3_vmt.rename(columns={"weighted_vmt": vmt_column}, inplace=True) + + parking_process_activity = skims[ + (skims["process"].isin(["STREX", "DIURN", "HOTSOAK", "RUNLOSS", "IDLEX"])) & + (skims["vht"] == 0) + ].groupby(["scenario", "linkId"]).size().reset_index(name='count') + + h3_count = process_h3_data(network_h3_intersection, parking_process_activity, "count") + count_column = "Weighted count of parking activities" + h3_count.rename(columns={"weighted_count": count_column}, inplace=True) + + print("Processing emissions...") + # Process each pollutant + pm25 = process_h3_emissions(skims, network_h3_intersection, 'PM2_5') + nox = process_h3_emissions(skims, network_h3_intersection, 'NOx') + co = process_h3_emissions(skims, network_h3_intersection, 'CO') + co2 = process_h3_emissions(skims, network_h3_intersection, 'CO2') + + # Convert to grams per square meter + pm25_column = "PM2_5 in grams per square meter" + pm25[pm25_column] = pm25["PM2_5"] * 1e6 # from metric ton to gram + + nox_column = "NOx in grams per square meter" + nox[nox_column] = nox["NOx"] * 1e6 + + co_column = "CO in grams per square meter" + co[co_column] = co["CO"] * 1e6 + + co2_column = "CO2 in grams per square meter" + co2[co2_column] = co2["CO2"] * 1e6 + + print("Calculating delta emissions...") + # Calculate delta emissions between scenarios + # PM2.5 delta + pm25_delta = pm25.pivot(index='h3_cell', columns='scenario', values='PM2_5').reset_index() + pm25_delta = pm25_delta.fillna(0) + pm25_delta["scenario"] = "-".join([scenario_2050_label, scenario_2018_label]) + pm25_delta['Delta_PM2_5'] = pm25_delta[scenario_2050_label] - pm25_delta[scenario_2018_label] + pm25_delta_column = "Delta PM2_5 in grams per square meter" + pm25_delta[pm25_delta_column] = pm25_delta["Delta_PM2_5"] * 1e6 + + # NOx delta + nox_delta = nox.pivot(index='h3_cell', columns='scenario', values='NOx').reset_index() + nox_delta = nox_delta.fillna(0) + nox_delta["scenario"] = "-".join([scenario_2050_label, scenario_2018_label]) + nox_delta['Delta_NOx'] = nox_delta[scenario_2050_label] - nox_delta[scenario_2018_label] + nox_delta_column = "Delta NOx in grams per square meter" + nox_delta[nox_delta_column] = nox_delta["Delta_NOx"] * 1e6 + + # CO2 delta + co2_delta = co2.pivot(index='h3_cell', columns='scenario', values='CO2').reset_index() + co2_delta = co2_delta.fillna(0) + co2_delta["scenario"] = "-".join([scenario_2050_label, scenario_2018_label]) + co2_delta['Delta_CO2'] = co2_delta[scenario_2050_label] - co2_delta[scenario_2018_label] + co2_delta_column = "Delta CO2 in grams per square meter" + co2_delta[co2_delta_column] = co2_delta["Delta_CO2"] * 1e6 + + print("Generating plots...") + # Figure 1: Activity plots + plot_hourly_activity(famos_tours, fuel_beam2emfac_map, plot_dir, height_size=6) + plot_hourly_vmt(famos_vmt, plot_dir, height_size=6) + + # Figure 2: VMT comparison + plot_multi_pie_emfac_famos_vmt(emfac_famos_vmt, plot_dir) + + # Figure 3: Activity heatmaps + plot_h3_heatmap(h3_vmt, vmt_column, scenario_2018_label, plot_dir, is_delta=False, remove_outliers=True, + in_log_scale=True) + plot_h3_heatmap(h3_count, count_column, scenario_2018_label, plot_dir, is_delta=False, remove_outliers=True, + in_log_scale=True) + + # Figure 4: Emissions heatmaps + plot_h3_heatmap(pm25, pm25_column, scenario_2018_label, plot_dir, is_delta=False, remove_outliers=True, + in_log_scale=True) + plot_h3_heatmap(nox, nox_column, scenario_2018_label, plot_dir, is_delta=False, remove_outliers=True, + in_log_scale=True) + plot_h3_heatmap(co2, co2_column, scenario_2018_label, plot_dir, is_delta=False, remove_outliers=True, + in_log_scale=True) + + # Figure 5: Hourly emissions + plot_hourly_emissions_by_scenario_class_fuel(skims, 'PM2_5', plot_dir, plot_legend=True, height_size=6, + font_size=24) + plot_hourly_emissions_by_scenario_class_fuel(skims, 'NOx', plot_dir, plot_legend=True, height_size=6, font_size=24) + plot_hourly_emissions_by_scenario_class_fuel(skims, 'CO2', plot_dir, plot_legend=True, height_size=6, font_size=24) + + # Figure 6: Delta emissions heatmaps + plot_h3_heatmap(pm25_delta, pm25_delta_column, "-".join([scenario_2050_label, scenario_2018_label]), plot_dir, + is_delta=True, remove_outliers=True, in_log_scale=True) + plot_h3_heatmap(nox_delta, nox_delta_column, "-".join([scenario_2050_label, scenario_2018_label]), plot_dir, + is_delta=True, remove_outliers=True, in_log_scale=True) + plot_h3_heatmap(co2_delta, co2_delta_column, "-".join([scenario_2050_label, scenario_2018_label]), plot_dir, + is_delta=True, remove_outliers=True, in_log_scale=True) + + # Figure 7: Pollution variability + plot_pollution_variability_by_process_vehicle_types(skims, "PM2_5", scenario_2018_label, plot_dir, height_size=6, + font_size=24) + plot_pollution_variability_by_process_vehicle_types(skims, "NOx", scenario_2018_label, plot_dir, height_size=6, + font_size=24) + plot_pollution_variability_by_process_vehicle_types(skims, "CO2", scenario_2018_label, plot_dir, height_size=6, + font_size=24) + + plot_pollutants_by_process(skims, scenario_2018_label, plot_dir, height_size=6, font_size=24) + plot_pollutants_by_process(skims, scenario_2050_label, plot_dir, height_size=6, font_size=24) + + print("Processing completed successfully.") \ No newline at end of file diff --git a/src/main/python/emissions/todo_distribute_vehicle_types.py b/src/main/python/emissions/todo_distribute_vehicle_types.py new file mode 100644 index 00000000000..afa3aa5915a --- /dev/null +++ b/src/main/python/emissions/todo_distribute_vehicle_types.py @@ -0,0 +1,57 @@ +import math +import os + +import pandas as pd + + +def unpacking_ft_vehicle_population_mesozones(carriers, mesozones_to_county_file, mesozones_lookup_file): + import pygris + # ### Mapping counties with Mesozones ### + if not os.path.exists(mesozones_to_county_file): + county_data = pygris.counties(state='06', year=2018, cb=True, cache=True) + cbg_data = pygris.block_groups(state='06', year=2018, cb=True, cache=True) + county_data_clipped = county_data[['COUNTYFP', 'NAME']] + cbg_data_clipped = cbg_data[['GEOID', 'COUNTYFP']] + cbg_to_county = pd.merge(cbg_data_clipped, county_data_clipped, on="COUNTYFP", how='left') + mesozones_lookup = pd.read_csv(mesozones_lookup_file, dtype=str) + mesozones_lookup_clipped = mesozones_lookup[['MESOZONE', 'GEOID']] + mesozones_to_county = pd.merge(mesozones_lookup_clipped, cbg_to_county, on='GEOID', how='left') + mesozones_to_county.to_csv(mesozones_to_county_file, index=False) + else: + mesozones_to_county = pd.read_csv(mesozones_to_county_file, dtype=str) + + # TODO For future improvement find a way to map outside study area mesozones. It's a significant effort because + # TODO need to also restructure EMFAC in such a way vehicle population from outside study area well represented + if not mesozones_to_county[mesozones_to_county["NAME"].isna()].empty: + print("Mesozones outside study area do not have a proper GEOID and were not mapped.") + mesozones_to_county_studyarea = mesozones_to_county[mesozones_to_county["NAME"].notna()][["MESOZONE", "NAME"]] + + # ### Mapping freight carriers with counties, payload and vehicle types ### + carriers_by_zone = pd.merge(carriers, mesozones_to_county_studyarea, left_on='warehouseZone', + right_on='MESOZONE', how='left') + if not carriers_by_zone[carriers_by_zone['NAME'].isna()].empty: + print( + "Something went wrong with the mapping of freight carrier zones with mesozones. Here the non mapped ones:") + print(carriers_by_zone[carriers_by_zone['NAME'].isna()]) + carriers_by_zone = carriers_by_zone[['tourId', 'vehicleId', 'vehicleTypeId', 'NAME']].rename( + columns={'NAME': 'zone'}) + + return carriers_by_zone + + +def calculate_truck_ownership_probability(income): + """ + Calculate the probability of truck ownership based on household income. + + :param income: Household income in thousands of dollars per year + :return: Probability of truck ownership (0 to 1) + """ + k = 0.1 # Steepness parameter + x0 = 80 # Income at which probability is 0.5 + + # Calculate probability using logistic function + probability = 1 / (1 + math.exp(-k * (income - x0))) + + return probability + + diff --git a/src/main/python/freight/__init__.py b/src/main/python/freight/__init__.py new file mode 100644 index 00000000000..f49f713c5a3 --- /dev/null +++ b/src/main/python/freight/__init__.py @@ -0,0 +1 @@ +# python/freight/__init__.py \ No newline at end of file diff --git a/src/main/python/freight/estimate_stop_duration.py b/src/main/python/freight/estimate_stop_duration.py new file mode 100644 index 00000000000..8e5974d7afb --- /dev/null +++ b/src/main/python/freight/estimate_stop_duration.py @@ -0,0 +1,917 @@ +import math +import os +import sys + +import numpy as np +import pandas as pd +import random +from collections import defaultdict + +# Get the absolute path to the directory containing this script +current_dir = os.path.dirname(os.path.abspath(__file__)) + +# Go up to the parent directory that contains the 'python' directory +# If your file is in /path/to/python/freight/frism_to_beam_freight_plans.py +# This will add /path/to to sys.path +parent_dir = os.path.dirname(os.path.dirname(current_dir)) +sys.path.insert(0, parent_dir) + +# Now use absolute import +from python.utils.study_area_config import BeamClasses +from python.utils.study_area_config import get_area_config + +vehicle_classes = BeamClasses.get_medium_heavy_freight_classes() + + +def sample_from_distribution(distribution_info): + """ + Sample a value from a distribution based on its parameters. + """ + if distribution_info['distribution'] == 'lognormal': + params = distribution_info['params'] + bounds = distribution_info['bounds'] + + # Sample from lognormal distribution + try: + sample = float(np.random.lognormal( + float(params['mu']), + float(params['sigma']) + )) + except (ValueError, TypeError): + # Fallback if parameters are invalid + sample = float(distribution_info['mean']) + + # Apply bounds + sample = max(float(bounds['min']), min(float(bounds['max']), sample)) + + # Round to nearest minute + return round(sample) + else: + # Default to mean if distribution type not recognized + return round(float(distribution_info['mean'])) + + +def get_base_duration(df, weight_dict): + """ + Calculate base duration for each vehicle class. + + Args: + df: DataFrame with survey data + weight_dict: Optional dictionary mapping weight ranges to bin labels + + Returns: + Dictionary mapping vehicle classes to base durations (in seconds) + """ + # Create a copy to avoid modifying the original DataFrame + df_copy = df.copy() + + # Create bins for cargo weights based on the weight_dict + df_copy['cargoWeightPU_bin'] = df_copy['cargoWeightPU'].apply(lambda x: get_weight_bin(x, weight_dict)) + df_copy['cargoWeightDO_bin'] = df_copy['cargoWeightDO'].apply(lambda x: get_weight_bin(x, weight_dict)) + + # Group by vehicle class, activity type, and weight bins + groupby_columns = ['vehicleClass', 'activityType', 'cargoWeightPU_bin', 'cargoWeightDO_bin'] + + # Group and calculate average operation duration + grouped_data = df_copy.groupby(groupby_columns)['operationDurationInMin'].agg(['mean', 'count']).reset_index() + + print("\nGrouped data summary:") + print(f"Total groups: {len(grouped_data)}") + + # Find the minimum average duration for each vehicle class across all combinations + min_durations = {} + for vehicle_class in vehicle_classes: + class_data = grouped_data[grouped_data['vehicleClass'] == vehicle_class].copy().reset_index() + + min_duration = class_data['mean'].min() + min_durations[vehicle_class] = min_duration + print(f"{vehicle_class}: Minimum average duration = {min_duration:.2f} minutes") + + # Show the specific combination that resulted in the minimum + if len(class_data) > 0: + min_idx = class_data[class_data['mean'] == min_duration].index[0] + min_row = class_data.iloc[min_idx] + print(f" Combination: {min_row[groupby_columns].to_dict()}") + print(f" Count: {min_row['count']} records") + + # Convert minutes to seconds for the base_duration dictionary + min_durations_in_sec = {} + for vehicle_class, min_duration in min_durations.items(): + # Convert to seconds and round to nearest minute + min_durations_in_sec[vehicle_class] = int(round(min_duration * 60 / 60) * 60) + + return min_durations_in_sec + + +def get_weight_factor(df, base_duration, operation_dict): + """ + Calculate weight factor for each vehicle class based on the relationship between + cargo weight and operation duration in the survey data. + """ + # Create a copy to avoid warnings + df_copy = df.copy() + + # Group by vehicle class + weight_factors = {} + + for vehicle_class in vehicle_classes: + class_data = df_copy[df_copy['vehicleClass'] == vehicle_class].copy() + loading_df = class_data[class_data['activityType'] == operation_dict["loading"]].copy() + unloading_df = class_data[class_data['activityType'] == operation_dict["unloading"]].copy() + + # Set effective weight for each activity type + loading_df['effectiveWeight'] = loading_df['cargoWeightPU'] + unloading_df['effectiveWeight'] = unloading_df['cargoWeightDO'] + + # Combine datasets for weight factor calculation + combined_df = pd.concat([loading_df, unloading_df]) + combined_df = combined_df[combined_df['effectiveWeight'] > 0] + + # Get base duration (convert seconds to minutes) + base = base_duration[vehicle_class] / 60 + + # Calculate individual factors for each record + combined_df['individual_factor'] = (combined_df['operationDurationInMin'] - base) / combined_df['effectiveWeight'] + + # Use median to avoid remaining outliers + median_factor = combined_df['individual_factor'].median() + + # Convert from minutes/kg to seconds/kg + weight_factors[vehicle_class] = max(0, median_factor * 60) + + + print("\nWeight factors:") + for vehicle_class, factor in weight_factors.items(): + print(f" {vehicle_class}: {factor:.6f} seconds per kg") + + return weight_factors + + +def get_operation_factor(df, operation_dict): + """ + Calculate operation factor (loading vs unloading) for each vehicle class. + """ + # Create a copy to avoid warnings + df_copy = df.copy() + + # Group and calculate average operation duration normalized by weight + grouped_data = df_copy.groupby(['vehicleClass', 'activityType'])['operationDurationInMin'].mean().reset_index() + + # Calculate operation factors relative to unloading + operation_factors = {} + + for vehicle_class in vehicle_classes: + class_data = grouped_data[grouped_data['vehicleClass'] == vehicle_class] + loading_df = class_data[class_data['activityType'] == operation_dict["loading"]] + unloading_df = class_data[class_data['activityType'] == operation_dict["unloading"]] + + loading_duration = loading_df['operationDurationInMin'].values[0] + unloading_duration = unloading_df['operationDurationInMin'].values[0] + + loading_ratio = loading_duration / (loading_duration+unloading_duration) + unloading_ratio = 1 - loading_ratio + + operation_factors[vehicle_class] = { + 'loading': 2 * loading_ratio, + 'unloading': 2 * unloading_ratio + } + + print("\nOperation factors:") + for vehicle_class, factors in operation_factors.items(): + print(f" {vehicle_class}: loading = {factors['loading']:.2f}, unloading = {factors['unloading']:.2f}") + + return operation_factors + + +def extract_weight_bins(df, operation_dict, num_bins=7): + """ + Extract weight bins from the data dynamically. + + Args: + df: DataFrame with weight data + num_bins: Number of bins to create + + Returns: + Dictionary mapping (lower, upper) bounds to bin labels + """ + # Combine pickup and delivery weights + weights = [] + for _, row in df.iterrows(): + if row['activityType'] == operation_dict["loading"]: + weights.append(row['cargoWeightPU']) + elif row['activityType'] == operation_dict["unloading"]: + weights.append(row['cargoWeightDO']) + + # Remove extreme outliers to prevent skewing the bin boundaries + weights = np.array(weights) + q1, q3 = np.percentile(weights, [25, 75]) + iqr = q3 - q1 + lower_bound = q1 - 1.5 * iqr + upper_bound = q3 + 1.5 * iqr + filtered_weights = weights[(weights >= max(0, lower_bound)) & (weights <= upper_bound)] + + # Use percentile-based bins to ensure even distribution of data + percentiles = np.linspace(0, 100, num_bins + 1) + bin_edges = np.percentile(filtered_weights, percentiles) + + # Round bin edges for better readability + bin_edges = np.unique([round(edge, -1) for edge in bin_edges]) + + # Ensure the bins are strictly increasing + bin_edges = np.unique(bin_edges) + if bin_edges[0] > 0: + bin_edges = np.insert(bin_edges, 0, 0) + if bin_edges[-1] != float('inf'): + bin_edges = np.append(bin_edges, float('inf')) + + # Create weight_dict + weight_dict = {} + for i in range(len(bin_edges) - 1): + lower = bin_edges[i] + upper = bin_edges[i + 1] + + # Format the label based on weight magnitude + if upper < 1000: + label = f"{int(lower)}-{int(upper)}lb" + elif upper < 10000: + label = f"{int(lower / 1000)}k-{int(upper / 1000)}klb" + elif upper == float('inf'): + label = f"{int(lower / 1000)}k+lb" + else: + label = f"{int(lower / 1000)}k-{int(upper / 1000)}klb" + + # For the last bin, use infinity + if i == len(bin_edges) - 2: + label = f"{int(lower)}+lb" + + weight_dict[(lower, upper)] = label + + return weight_dict + + +def get_weight_bin(weight_value, weight_dict): + """ + Determine the weight bin for a given weight value. + + Args: + weight_value: The weight value + weight_dict: Dictionary mapping (lower, upper) bounds to bin labels + + Returns: + The bin label for the weight value + """ + for (lower, upper), label in weight_dict.items(): + if lower <= weight_value < upper: + return label + + # Fallback for any value not covered (should not happen with properly defined bins) + return list(weight_dict.values())[-1] # Return the highest bin + + +def calculate_lognormal_params(mean, std): + """ + Calculate mu and sigma parameters for lognormal distribution + given desired mean and standard deviation. + """ + # Avoid division by zero or negative values + if mean <= 0 or std <= 0: + return {'mu': 0, 'sigma': 1} + + # Calculate variance + variance = std ** 2 + + # Calculate sigma squared + sigma_squared = math.log(1 + (variance / (mean ** 2))) + + # Calculate sigma + sigma = math.sqrt(sigma_squared) + + # Calculate mu + mu = math.log(mean) - (sigma_squared / 2) + + return {'mu': mu, 'sigma': sigma} + + +def build_operation_duration_model_from_austin_survey(survey_file_path, variability_exponent=0.7): + # Process the survey data to extract parameters + operation_dict, weight_dict, base_durations, weight_factors, operation_factors, variability_factors = \ + process_austin_survey_data(survey_file_path) + + # Create the model class + duration_model = OperationDurationModel( + operation_dict, + weight_dict, + base_durations, + weight_factors, + operation_factors, + variability_factors, + variability_exponent + ) + + return duration_model + + + +def build_operation_duration_model(operation_dict, weight_dict, base_durations, + weight_factors, operation_factors, variability_factors, + variability_exponent=0.7): + """ + Build a nested model for operation durations from predefined factors. + + Args: + weight_dict: Dictionary mapping weight ranges to bin labels + base_durations: Dictionary mapping vehicle classes to base durations (in seconds) + weight_factors: Dictionary mapping vehicle classes to weight factors (seconds per lb) + operation_factors: Nested dictionary mapping vehicle classes and operation types to factors + variability_factors: Dictionary mapping vehicle classes to variability factors + variability_exponent: Exponent for the utility-based variability model (default: 0.7) + + Returns: + Nested model structure + """ + # Build the nested model structure + model = { + "weight_dict": weight_dict, + "sample_func": sample_operation_duration + } + + # First level: Vehicle Class + for vehicle_class in vehicle_classes: + model[vehicle_class] = {} + + # Get the variability factor for this vehicle class + variability = variability_factors[vehicle_class] + + # Second level: Operation Type (fixed missing .items()) + for operation_key, standard_op_type in operation_dict.items(): + model[vehicle_class][standard_op_type] = {} + + # Get operation factor for this combination + op_factor = operation_factors[vehicle_class][standard_op_type] + + # Third level: Weight Bins + for (lower_bound, upper_bound), bin_label in weight_dict.items(): + # Calculate the midpoint of the weight bin for reference + if upper_bound == float('inf'): + midpoint = lower_bound * 1.5 + else: + midpoint = (lower_bound + upper_bound) / 2 + + # Get the base duration for this vehicle class (in seconds) + base = base_durations[vehicle_class] + + # Get the weight factor for this vehicle class (seconds per lb) + weight_factor = weight_factors[vehicle_class] + + # Calculate mean duration using the formula + mean_duration_sec = (base + midpoint * weight_factor) * op_factor + + # Convert to minutes + mean_duration_min = mean_duration_sec / 60 + + # Utility approach: variability is a non-linear function of duration + # Using the configurable exponent parameter + std_duration_min = variability * mean_duration_min ** variability_exponent + + # Create the distribution parameters + model[vehicle_class][standard_op_type][bin_label] = { + 'distribution': 'lognormal', # More realistic for durations + 'params': calculate_lognormal_params(mean_duration_min, std_duration_min), + 'mean': mean_duration_min, + 'std': std_duration_min, + 'count': 10, # Add a default count for compatibility + 'bounds': { + 'min': max(1, mean_duration_min - 2.5 * std_duration_min), + 'max': mean_duration_min + 3 * std_duration_min + } + } + + return model + + +def find_closest_bin(weight_lbs, weight_bins, available_bins): + # Find closest bin based on numeric weight value + closest_bin = available_bins[0] + closest_distance = float('inf') + + for bin_name in available_bins: + # Find the bin that would contain this weight + for (lower, upper), label in weight_bins.items(): + if label == bin_name: + # Calculate midpoint of this bin + if upper == float('inf'): + midpoint = lower * 1.5 # Approximate midpoint for highest bin + else: + midpoint = (lower + upper) / 2 + + # Check if this is closer to our target weight + distance = abs(midpoint - weight_lbs) + if distance < closest_distance: + closest_distance = distance + closest_bin = bin_name + break + + return closest_bin + + +def sample_operation_duration(model, vehicle_class, operation_type, weight_lbs, fallback_duration=30): + """ + Sample an operation duration from the model based on vehicle class, operation type, and weight. + """ + # Handle empty model + if not model or vehicle_class not in model or operation_type not in model[vehicle_class]: + return fallback_duration + + # Get weight dictionary + weight_dict = model.get("weight_dict", {}) + if not weight_dict: + return fallback_duration + + # Determine weight bin + weight_bin = get_weight_bin(weight_lbs, weight_dict) + + # If weight bin isn't found in the model for this combination, find closest bin + if weight_bin not in model[vehicle_class][operation_type]: + available_bins = list(model[vehicle_class][operation_type].keys()) + if not available_bins: + return fallback_duration + weight_bin = find_closest_bin(weight_lbs, weight_dict, available_bins) + + # Get the distribution for this combination + distribution = model[vehicle_class][operation_type][weight_bin] + + # Sample a duration using the distribution parameters + return sample_from_distribution(distribution) + + +def process_austin_survey_data(survey_file_path): + """ + Process the Austin survey data to extract parameters for the operation duration model. + + Args: + survey_file_path: Path to the survey data file + + Returns: + Tuple of (vehicle_classes, operation_dict, base_durations, weight_factors, + operation_factors, variability_factors, weight_dict) + """ + print("Loading survey data from:", survey_file_path) + + # Load the survey data + survey_data = pd.read_csv(survey_file_path) + + print("Extracting model parameters from survey data...") + # Print summary of the survey data + print(f"Found {len(survey_data)} valid records in survey data") + print(f"Vehicle classes: {', '.join(survey_data['vehicleClass'].unique())}") + print(f"Activity types: {', '.join(survey_data['activityType'].unique())}") + + operation_dict = { + 'loading': 'Pick up Cargo', + 'unloading': 'Delivery of Cargo' + } + + # Filter data for relevant activity types and vehicle classes + survey_data2 = survey_data[survey_data['activityType'].isin(operation_dict.values())] + survey_data2 = survey_data2[survey_data2['vehicleClass'].isin(vehicle_classes)].copy() + + # Use the extract_weight_bins function to get data-driven weight bins + weight_dict = extract_weight_bins(survey_data2, operation_dict) + + # Print the extracted weight bins + print("\nExtracted weight bins:") + for (lower, upper), label in sorted(weight_dict.items(), key=lambda x: x[0][0]): + print(f" {label}: {lower} to {upper} lbs") + + # Extract base durations + base_durations = get_base_duration(survey_data2, weight_dict) + print("\nExtracted base durations (seconds):") + for vc, duration in base_durations.items(): + print(f" {vc}: {duration} seconds") + + # Extract weight factors + weight_factors = get_weight_factor(survey_data2, base_durations, operation_dict) + print("\nExtracted weight factors (seconds per lb):") + for vc, factor in weight_factors.items(): + print(f" {vc}: {factor:.6f} seconds per lb") + + # Extract operation factors + operation_factors = get_operation_factor(survey_data2, operation_dict) + print("\nExtracted operation factors:") + for vc, factors in operation_factors.items(): + print(f" {vc}: loading={factors['loading']:.2f}, unloading={factors['unloading']:.2f}") + + # Calculate variability factors from the data + variability_factors = {} + for vehicle_class in vehicle_classes: + # Filter data for this vehicle class + class_data = survey_data2[survey_data2['vehicleClass'] == vehicle_class] + # Calculate coefficient of variation (std/mean) + cv = class_data['operationDurationInMin'].std() / class_data['operationDurationInMin'].mean() + # Adjust CV to work with our utility model + variability_factors[vehicle_class] = min(0.5, max(0.1, cv)) + + print("\nCalculated variability factors:") + for vc, factor in variability_factors.items(): + print(f" {vc}: {factor:.2f}") + + return operation_dict, weight_dict, base_durations, weight_factors, operation_factors, variability_factors + + +def update_operation_duration(study_area_config, payloads, tours, carriers, vehicle_types): + """ + Update operation durations based on factors extracted from survey data. + """ + survey_file_path = os.path.join(study_area_config["work_dir"], + study_area_config["freight"]["stops_data"]) + + # Create and load the model + duration_model = SimpleStopDurationModel() + duration_model.load_survey_data(survey_file_path) + + # # Build the model from Austin survey data + # duration_model = build_operation_duration_model_from_austin_survey(survey_file_path) + + # Print model summary + duration_model.print_summary() + + # Create a copy to avoid modifying the original DataFrame + updated_payloads = payloads.copy() + + # Merge tours with carriers and vehicle_types to get vehicle information + tours_with_vehicle = tours.merge( + carriers, + on='tourId', + how='left' + ) + + # Now merge with vehicle_types + tours_with_vehicle = tours_with_vehicle.merge( + vehicle_types, + on='vehicleTypeId', + how='left' + ) + + tours_with_vehicle = tours_with_vehicle.drop_duplicates(subset=['tourId', 'vehicleTypeId'], keep='first') + + # Then, merge payloads with the combined tours/vehicle data to get vehicle info for each payload + payload_with_vehicle = updated_payloads.merge( + tours_with_vehicle[['tourId', 'vehicleCategory']], + on='tourId', + how='left' + ) + + # Calculate updated durations + def calculate_duration(row): + # Sample from the model + duration_min = duration_model.sample_duration( + row['vehicleCategory'], + row['requestType'], + row['weightInKg'] * 2.20462, # Convert kg to lbs + randomize_factor=0 + ) + # Convert to seconds + return duration_min * 60 + + # Apply the calculation to each row + payload_with_vehicle['operationDurationInSec'] = payload_with_vehicle.apply(calculate_duration, axis=1) + + updated_columns = payloads.columns.tolist() + return payload_with_vehicle[updated_columns] + + +class OperationDurationModel: + def __init__(self, operation_dict, weight_dict, base_durations, weight_factors, operation_factors, + variability_factors, variability_exponent=0.7): + """ + Initialize the operation duration model. + + Args: + operation_dict: Dictionary mapping operation keys to operation types + weight_dict: Dictionary mapping weight ranges to bin labels + base_durations: Dictionary mapping vehicle classes to base durations (in seconds) + weight_factors: Dictionary mapping vehicle classes to weight factors (seconds per lb) + operation_factors: Nested dictionary mapping vehicle classes and operation types to factors + variability_factors: Dictionary mapping vehicle classes to variability factors + variability_exponent: Exponent for the utility-based variability model (default: 0.7) + """ + self.operation_dict = operation_dict + self.weight_bins = weight_dict + self.base_durations = base_durations + self.weight_factors = weight_factors + self.operation_factors = operation_factors + self.variability_factors = variability_factors + self.variability_exponent = variability_exponent + + # Build the model structure + self.model = self._build_model() + + def _build_model(self): + """Build the nested model structure from the provided factors.""" + model = {} + + # First level: Vehicle Class + for vehicle_class in vehicle_classes: + model[vehicle_class] = {} + + # Get the variability factor for this vehicle class + variability = self.variability_factors[vehicle_class] + + # Second level: Operation Type + for operation_key, standard_op_type in self.operation_dict.items(): + model[vehicle_class][operation_key] = {} + + # Get operation factor for this combination + op_factor = self.operation_factors[vehicle_class][operation_key] + + # Third level: Weight Bins + for (lower_bound, upper_bound), bin_label in self.weight_bins.items(): + # Calculate the midpoint of the weight bin for reference + if upper_bound == float('inf'): + midpoint = lower_bound * 1.5 + else: + midpoint = (lower_bound + upper_bound) / 2 + + # Get the base duration for this vehicle class (in seconds) + base = self.base_durations[vehicle_class] + + # Get the weight factor for this vehicle class (seconds per lb) + weight_factor = self.weight_factors[vehicle_class] + + # Calculate mean duration using the formula + mean_duration_sec = (base + midpoint * weight_factor) * op_factor + + # Convert to minutes + mean_duration_min = mean_duration_sec / 60 + + # Utility approach: variability is a non-linear function of duration + std_duration_min = variability * mean_duration_min ** self.variability_exponent + + # Create the distribution parameters + model[vehicle_class][operation_key][bin_label] = { + 'distribution': 'lognormal', + 'params': calculate_lognormal_params(mean_duration_min, std_duration_min), + 'mean': mean_duration_min, + 'std': std_duration_min, + 'count': 10, + 'bounds': { + 'min': max(1, mean_duration_min - 2.5 * std_duration_min), + 'max': mean_duration_min + 3 * std_duration_min + } + } + + return model + + + def sample_operation_duration(self, vehicle_class, operation_type, weight_lbs, fallback_duration=30): + """ + Sample an operation duration from the model based on vehicle class, operation type, and weight. + + Args: + vehicle_class: The vehicle class (e.g., 'Class456Vocational') + operation_type: Either 'loading' or 'unloading' + weight_lbs: The weight in pounds (lbs) + fallback_duration: Default duration if sampling fails + + Returns: + Duration in minutes + """ + # Handle empty model + if not self.model or vehicle_class not in self.model or operation_type not in self.model[vehicle_class]: + return fallback_duration + + # Determine weight bin + weight_bin = get_weight_bin(weight_lbs, self.weight_bins) + + # If weight bin isn't found in the model for this combination, find closest bin + if weight_bin not in self.model[vehicle_class][operation_type]: + available_bins = list(self.model[vehicle_class][operation_type].keys()) + if not available_bins: + return fallback_duration + weight_bin = find_closest_bin(weight_lbs, self.weight_bins, available_bins) + + + # Get the distribution for this combination + distribution = self.model[vehicle_class][operation_type][weight_bin] + + # Sample a duration using the distribution parameters + return sample_from_distribution(distribution) + + def print_summary(self): + """Print a summary of the model statistics.""" + print("Operation Duration Model Summary:") + for vehicle_class in self.model: + print(f"\nVehicle Class: {vehicle_class}") + for op_type in self.model[vehicle_class]: + print(f" Operation Type: {op_type}") + for weight_bin in self.model[vehicle_class][op_type]: + stats = self.model[vehicle_class][op_type][weight_bin] + print(f" {weight_bin}: mean={stats['mean']:.1f}min, std={stats['std']:.1f}min") + + +class SimpleStopDurationModel: + """ + A simplified decision tree model for stop durations based on: + - Vehicle class + - Operation type (loading/unloading) + - Weight bins + + The model samples actual durations from the survey data and adds randomness. + """ + + def __init__(self): + """Initialize the model with empty structure.""" + # Main structure to hold the decision tree + self.duration_tree = defaultdict( + lambda: defaultdict( + lambda: defaultdict(list) + ) + ) + + # Weight bins dictionary + self.weight_bins = {} + + # Operation type mapping + self.operation_dict = { + 'loading': 'Pick up Cargo', + 'unloading': 'Delivery of Cargo' + } + + # For reporting statistics + self.stats = {} + + def load_survey_data(self, survey_file_path): + """ + Load the Austin survey data and organize it into the decision tree. + + Args: + survey_file_path: Path to the survey data CSV file + """ + print(f"Loading survey data from: {survey_file_path}") + + # Load the survey data + survey_data = pd.read_csv(survey_file_path) + + # Print summary of the survey data + print(f"Found {len(survey_data)} records in survey data") + print(f"Vehicle classes: {', '.join(survey_data['vehicleClass'].unique())}") + print(f"Activity types: {', '.join(survey_data['activityType'].unique())}") + + # Filter data for relevant activity types and vehicle classes + filtered_data = survey_data[ + survey_data['activityType'].isin(self.operation_dict.values()) & + survey_data['vehicleClass'].isin(vehicle_classes) + ].copy() + + print(f"Filtered to {len(filtered_data)} relevant records") + + self.weight_bins = extract_weight_bins(filtered_data, self.operation_dict) + + # Build the decision tree from the filtered data + self._build_decision_tree(filtered_data) + + # Calculate statistics for reporting + self._calculate_statistics() + + return self + + def _build_decision_tree(self, data): + """ + Build the decision tree from the filtered data. + + Args: + data: Filtered DataFrame with survey data + """ + # Process each row in the data + for _, row in data.iterrows(): + # Get the vehicle class + vehicle_class = row['vehicleClass'] + + # Get the operation type + activity = row['activityType'] + operation_type = 'loading' if activity == self.operation_dict['loading'] else 'unloading' + + # Get the weight and determine its bin + weight = row['cargoWeightPU'] if operation_type == 'loading' else row['cargoWeightDO'] + + weight_bin = get_weight_bin(weight, self.weight_bins) + + # Get the duration + duration = row['operationDurationInMin'] + + # Add the duration to the appropriate leaf in the tree + self.duration_tree[vehicle_class][operation_type][weight_bin].append(duration) + + def _calculate_statistics(self): + """Calculate and store statistics for each leaf in the tree.""" + for vehicle_class in self.duration_tree: + self.stats[vehicle_class] = {} + + for operation_type in self.duration_tree[vehicle_class]: + self.stats[vehicle_class][operation_type] = {} + + for weight_bin in self.duration_tree[vehicle_class][operation_type]: + durations = self.duration_tree[vehicle_class][operation_type][weight_bin] + + if durations: + self.stats[vehicle_class][operation_type][weight_bin] = { + 'count': len(durations), + 'min': min(durations), + 'max': max(durations), + 'mean': sum(durations) / len(durations), + 'median': sorted(durations)[len(durations) // 2], + 'std': np.std(durations) if len(durations) > 1 else 0 + } + else: + # Empty leaf + self.stats[vehicle_class][operation_type][weight_bin] = { + 'count': 0, + 'min': 0, + 'max': 0, + 'mean': 0, + 'median': 0, + 'std': 0 + } + + + def sample_duration(self, vehicle_class, operation_type, weight_lbs, randomize_factor=1.0, fallback_duration=30): + """ + Sample a duration for the given vehicle class, operation type, and weight. + + Args: + vehicle_class: The vehicle class + operation_type: The operation type (loading/unloading) + weight_lbs: The weight in pounds + randomize_factor: Whether to add randomness to the sampled duration + + Returns: + The sampled duration in minutes + """ + # Get the weight bin + weight_bin = get_weight_bin(weight_lbs, self.weight_bins) + + # If the bin has no data, find the closest bin with data + if weight_bin not in self.duration_tree[vehicle_class][operation_type] or not \ + self.duration_tree[vehicle_class][operation_type][weight_bin]: + available_bins = list(self.duration_tree[vehicle_class][operation_type].keys()) + if not available_bins: + return fallback_duration + weight_bin = find_closest_bin(weight_lbs, self.weight_bins, available_bins) + + # Get the durations for this leaf + durations = self.duration_tree[vehicle_class][operation_type][weight_bin] + + # Sample a random duration from the available ones + duration = random.choice(durations) + + # Add randomness based on the randomize_factor + if randomize_factor > 0: + # Calculate standard deviation of durations in this bin + std = np.std(durations) if len(durations) > 1 else duration * 0.2 + + # Scale the standard deviation by the randomize_factor + scaled_std = std * randomize_factor * 0.5 + + # Add Gaussian noise, but ensure the duration remains positive + randomized_duration = max(1, duration + random.gauss(0, scaled_std)) + return randomized_duration + else: + return duration + + def print_summary(self): + """Print a summary of the model statistics.""" + print("\nSimple Stop Duration Model Summary:") + + for vehicle_class in sorted(self.stats.keys()): + print(f"\nVehicle Class: {vehicle_class}") + + for operation_type in sorted(self.stats[vehicle_class].keys()): + print(f" Operation Type: {operation_type}") + + for weight_bin in sorted(self.stats[vehicle_class][operation_type].keys(), + key=lambda x: next( + (lower for (lower, upper), label in self.weight_bins.items() if + label == x), 0)): + stats = self.stats[vehicle_class][operation_type][weight_bin] + print(f" {weight_bin}: " + f"n={stats['count']}, " + f"mean={stats['mean']:.1f}min, " + f"std={stats['std']:.1f}min, " + f"range=[{stats['min']:.1f}-{stats['max']:.1f}]") + + +if __name__ == '__main__': + STUDY_AREA_CONFIG = get_area_config("sfbay") + work_dir = STUDY_AREA_CONFIG["work_dir"] + scenario_config = STUDY_AREA_CONFIG["freight"]["2018_Baseline"] + + _payload_plans = pd.read_csv(str(os.path.join(work_dir, scenario_config["payloads_file"]))) + _tours = pd.read_csv(str(os.path.join(work_dir, scenario_config["tours_file"]))) + _carriers = pd.read_csv(str(os.path.join(work_dir, scenario_config["carriers_file"]))) + _vehicle_types = pd.read_csv(str(os.path.join(work_dir, scenario_config["ft_vehicle_types_file"]))) + + _payload_plans["operationDurationInSecOG"] = _payload_plans["operationDurationInSec"] + _payload_plans = update_operation_duration(STUDY_AREA_CONFIG, _payload_plans, _tours, _carriers, _vehicle_types) + _payload_plans.to_csv("outputs/payloads_test.csv", index=False) + + diff --git a/src/main/python/freight/freight_events_filtering.py b/src/main/python/freight/freight_events_filtering.py new file mode 100644 index 00000000000..276b9798c44 --- /dev/null +++ b/src/main/python/freight/freight_events_filtering.py @@ -0,0 +1,51 @@ +import pandas as pd +import os +import sys + +city = "austin" +# city = "sfbay" +scenario = "parking-sensitivity" +# batch = "Oct30/" +batch = "2018_unlimited/" +iteration = 0 +prefix = "" +filename = prefix+str(iteration)+'.events.csv.gz' +local_work_directory = '~/Workspace/Data/FREIGHT/'+city+'/beam/runs/'+scenario+'/'+batch +full_filename = os.path.expanduser(local_work_directory + filename) + +if len(sys.argv) >= 2: + full_filename = str(sys.argv[1]) + +def print2(msg): + with open(full_filename + ".out", 'w') as f: + print(msg) + print(msg, file=f) + +dirname = os.path.dirname(full_filename) +basename = os.path.basename(full_filename) + +compression = None +if basename.endswith(".gz"): + compression = 'gzip' + + +print2("reading: " + full_filename) +data = pd.read_csv(full_filename, sep=",", index_col=None, header=0, compression=compression) +print2(data.type.unique()) +data_filtered = data.loc[data.type.isin(["PathTraversal", "actstart", "actend"])] +print2(data_filtered.type.unique()) +data_filtered = data_filtered.loc[data_filtered.vehicle.str.startswith("freight", na=True)] +print2(data_filtered.type.unique()) +data_filtered2 = data_filtered.loc[data_filtered.actType.isin(["Warehouse", "Unloading", "Loading"]) | data_filtered.actType.isnull()] +print2(data_filtered.type.unique()) +# data_filtered2 = data_filtered[ +# ["time","type","vehicleType","vehicle","secondaryFuelLevel", +# "primaryFuelLevel","driver","mode","seatingCapacity","startX", +# "startY", "endX", "endY", "capacity", "arrivalTime", "departureTime", +# "secondaryFuel", "secondaryFuelType", "primaryFuelType", +# "numPassengers", "length", "primaryFuel", "actType", "fuel", "person", +# "locationY", "locationX", "duration", "chargingPointType", "parkingType", "parkingTaz"] +# ] +print2("writing to " + dirname + "/" + "filtered." + basename) +data_filtered2.to_csv(dirname + "/" + "filtered." + basename) +print2("END") diff --git a/src/main/python/freight/freight_events_processing.py b/src/main/python/freight/freight_events_processing.py index e96cabd5d31..fa77f1d470e 100644 --- a/src/main/python/freight/freight_events_processing.py +++ b/src/main/python/freight/freight_events_processing.py @@ -7,40 +7,50 @@ ## Main # city, batch, scenario, sample = "sfbay", "baseline", "2018", 0.1 -city, batch, scenario, sample = "sfbay", "2024-08-07", "2018_Baseline", 0.1 +# city, batch, scenario, sample = "sfbay", "2024-08-07", "2018_Baseline", 0.1 +city, batch, scenario, run, sample = "seattle", "2024-04-20", "2018_Baseline", "2018_Baseline_RPS", 0.1 +# city, batch, scenario, run, sample = "seattle", "2024-04-20", "2018_Baseline", "2018_Baseline", 0.1 # city, batch, scenario, sample = "seattle", "2024-09-24", "2018_Baseline", 0.1 # city, batch, scenario, sample = "seattle", "2024-04-20", "2018_Baseline", 0.3 # work_dir = os.path.expanduser(f"/Volumes/HG40/Workspace/Simulation/{city}") work_dir = os.path.expanduser(f"~/Workspace/Simulation/{city}") +run_directory = f'{work_dir}/beam-runs/{batch}/{run}/' +scenario_directory = f'{work_dir}/beam-freight/{batch}/{scenario}/' events_filename = f"0.events.csv.gz" linkstats_filename = f"0.linkstats.csv.gz" +scenario_label = scenario.replace("_", "-") +run_label = run.replace("_", "-") +batch_label = batch.replace("-", "") # pd.set_option('display.max_columns',10) scale_up_factor = 1 / sample def main(): - setup_logging(f'{work_dir}/beam-runs/{batch}/{scenario}/freight_events_processing.log') - log_and_print(f"Run: {city}/{batch}/{scenario}/{sample}") - scenario_dir = os.path.join(work_dir, "beam-freight", batch, scenario) + setup_logging(f'{run_directory}/freight_events_processing_{run}.log') - linkstats_file = os.path.join(get_local_work_directory(scenario), linkstats_filename) + linkstats_file = os.path.join(run_directory, linkstats_filename) if os.path.exists(linkstats_file): linkstats_df = pd.read_csv(linkstats_file) - calc_vmt_from_linkstats(linkstats_df, scenario) + calc_vmt_from_linkstats(linkstats_df) - scenario_label = scenario.replace("_", "-") - batch_label = batch.replace("-", "") - carrier_df = pd.read_csv(os.path.join(scenario_dir, f"carriers--{scenario_label}.csv")) - tour_df = pd.read_csv(os.path.join(scenario_dir, f"tours--{scenario_label}.csv")) - payload_df = pd.read_csv(os.path.join(scenario_dir, f"payloads--{scenario_label}.csv")) + carrier_df = pd.read_csv(os.path.join(scenario_directory, f"carriers--{scenario_label}.csv")) + tour_df = pd.read_csv(os.path.join(scenario_directory, f"tours--{scenario_label}.csv")) + payload_df = pd.read_csv(os.path.join(scenario_directory, f"payloads--{scenario_label}.csv")) vehicle_types = pd.read_csv( - os.path.join(scenario_dir, "vehicle-tech", f"ft-vehicletypes--{batch_label}--{scenario_label}.csv")) + os.path.join(scenario_directory, "vehicle-tech", f"ft-vehicletypes--{batch_label}--{scenario_label}.csv")) vehicle_types_combined = merge_vehicle_types(vehicle_types, carrier_df, tour_df) - processed_event = process_events(scenario, vehicle_types_combined) - calc_vmt_from_events(processed_event, scenario) + + # Process events and get the processed events dataframe + processed_events = process_events(vehicle_types_combined) + + # Compare events with payloads and analyze missing tours + compare_events_and_payloads(processed_events, payload_df, scenario_directory, run, scenario_label) + + # Calculate VMT from events + calc_vmt_from_events(processed_events) log_and_print(f"[FRISM] Total number of vehicles: {int(len(carrier_df['vehicleId'].unique()) * scale_up_factor)}") - trips_df = convert_payload_to_trips(payload_df, scenario) + trips_df = convert_payload_to_trips(payload_df) summary = trips_by_vehicle_class(trips_df, carrier_df, vehicle_types) log_and_print("END") @@ -67,12 +77,12 @@ def merge_vehicle_types(vehicle_types: pd.DataFrame, carrier_df: pd.DataFrame, t return result2_df -def process_events(scenario, vehicle_types_combined): - events_filepath = os.path.join(get_local_work_directory(scenario), events_filename) - processed_events_filepath = os.path.join(get_local_work_directory(scenario), f"updated.filtered.{events_filename}") +def process_events(vehicle_types_combined): + events_filepath = os.path.join(run_directory, events_filename) + processed_events_filepath = os.path.join(run_directory, f"updated.filtered.{events_filename}") if not os.path.exists(processed_events_filepath): - events = read_events_file(events_filepath, scenario) + events = read_events_file(events_filepath, run) vehicle_types_combined['vehicleId'] = 'freightVehicle-' + vehicle_types_combined['vehicleId'].astype( str) processed_event_updated = pd.merge( @@ -152,7 +162,7 @@ def read_events_file(full_filename, run_name): return processed_events -def calc_vmt_from_linkstats(linkstats_df, scenario): +def calc_vmt_from_linkstats(linkstats_df): freight_classes = ["Class2b3Vocational", "Class456Vocational", "Class78Vocational", "Class78Tractor"] required_columns = ['length', 'hour'] + [f'volume_{col}' for col in freight_classes] missing_columns = [col for col in required_columns if col not in linkstats_df.columns] @@ -169,7 +179,7 @@ def calc_vmt_from_linkstats(linkstats_df, scenario): total_vmt = linkstats_df[ [f'vmt_{col}' for col in freight_classes]].sum().sum() * scale_up_factor / 1_000_000 log_and_print( - f"[BEAM] Total VMT from LinkStats for scenario {batch}/{scenario}: {total_vmt:.2f} million miles") + f"[BEAM] Total VMT from LinkStats ({batch}/{run}): {total_vmt:.2f} million miles") vmt_by_hour = linkstats_df.groupby('hour')[ [f'vmt_{col}' for col in freight_classes]].sum() * scale_up_factor / 1_000_000 @@ -191,14 +201,14 @@ def calc_vmt_from_linkstats(linkstats_df, scenario): plt.tight_layout() - plot_filename = os.path.join(get_local_work_directory(scenario), f"vmt_by_hour_category_{scenario}.png") + plot_filename = os.path.join(run_directory, f"vmt_by_hour_category_{run}.png") plt.savefig(plot_filename, bbox_inches='tight') # log_and_print(f"Bar plot saved as {plot_filename}") return vmt_by_hour -def calc_vmt_from_events(events, scenario): +def calc_vmt_from_events(events): # Filter for PathTraversal events and freight vehicles pt = events[ (events['type'] == 'PathTraversal') & @@ -212,10 +222,6 @@ def calc_vmt_from_events(events, scenario): log_and_print(f"This is a bug. Number of emergency vehicles found: {len(emergency_vehicles)}", logging.ERROR) log_and_print(f"Sample of emergency vehicles: {emergency_vehicles['vehicle'].head()}", logging.ERROR) - # log_and_print(f"powertrains: {pt["primaryFuelType"].unique()}") - # log_and_print(f"vehicletypes: {pt["vehicleType"].unique()}") - # Calculate total VMT - log_and_print( f"[BEAM] Total number of vehicles: {int(len(pt['vehicle'].unique()) * scale_up_factor)}") log_and_print( @@ -224,14 +230,33 @@ def calc_vmt_from_events(events, scenario): log_and_print( f"[BEAM] Total VMT: {total_vmt_million_miles:.2f} million miles") - vmt_by_category = pt.groupby(['business', 'vehicleCategory'])[ - 'length'].sum() * scale_up_factor / 1609.34 / 1_000_000 + # Calculate VMT by business + vmt_by_business = pt.groupby('business')['length'].sum() * scale_up_factor / 1609.34 / 1_000_000 + vmt_by_business = vmt_by_business.round(2) + business_table = pd.DataFrame({ + 'Business': vmt_by_business.index, + 'VMT (Million Miles)': vmt_by_business.values + }) + log_and_print("[BEAM] VMT by Business:" + business_table.to_string(index=False, float_format=lambda x: '%.2f' % x)) + + # Calculate VMT by vehicle category + vmt_by_category = pt.groupby('vehicleCategory')['length'].sum() * scale_up_factor / 1609.34 / 1_000_000 + vmt_by_category = vmt_by_category.round(2) + category_table = pd.DataFrame({ + 'Vehicle Category': vmt_by_category.index, + 'VMT (Million Miles)': vmt_by_category.values + }) + log_and_print( + "[BEAM] VMT by Vehicle Category:" + category_table.to_string(index=False, float_format=lambda x: '%.2f' % x)) - vmt_by_category = vmt_by_category.unstack(level='business') + # Create stacked bar plot + vmt_by_business_category = pt.groupby(['business', 'vehicleCategory'])[ + 'length'].sum() * scale_up_factor / 1609.34 / 1_000_000 + vmt_by_business_category = vmt_by_business_category.unstack(level='business') # Create bar plot - ax = vmt_by_category.plot(kind='bar', figsize=(12, 6), width=0.8) - plt.title(f'VMT by Business and Vehicle Category for {scenario}') + ax = vmt_by_business_category.plot(kind='bar', figsize=(12, 6), width=0.8) + plt.title(f'VMT by Business and Vehicle Category: ({batch}/{run})') plt.xlabel('Vehicle Category') plt.ylabel('VMT (Million Miles)') plt.legend(title='Business') @@ -242,15 +267,14 @@ def calc_vmt_from_events(events, scenario): ax.bar_label(container, fmt='%.2f', padding=3) plt.tight_layout() - png_output = os.path.join(get_local_work_directory(scenario), f"vmt_by_category_{scenario}.png") + png_output = os.path.join(run_directory, f"vmt_by_category_{run}.png") plt.savefig(png_output) return pt -def convert_payload_to_trips(payload_df, scenario): - output_file_path = os.path.join(work_dir, 'beam-freight', batch, scenario, - f'trips--{scenario.replace("_", "-")}.csv') +def convert_payload_to_trips(payload_df): + output_file_path = os.path.join(scenario_directory, f'trips--{scenario_label}.csv') # log_and_print(f"[FRISM] Total rows in payloads: {len(payload_df)}") # Count payloads per tour @@ -379,9 +403,30 @@ def trips_by_vehicle_class(trips_df, carriers_df, vehicle_types_df): return summary -def get_local_work_directory(scenario): - local_work_directory = f'{work_dir}/beam-runs/{batch}/{scenario}/' - return local_work_directory +def compare_events_and_payloads(events_df, payload_df, scenario_directory, run, scenario_label): + """ + Compare tours in events file with payload file and create a new payload file with missing tours. + """ + # Extract unique tourIds from events and payloads + events_tour_ids = set(events_df[events_df['type'] == 'PathTraversal']['tourId'].unique()) + payload_tour_ids = set(payload_df['tourId'].unique()) + + # Also check tours in payload but not in events + missing_from_events = payload_tour_ids - events_tour_ids + + # Log summary statistics + log_and_print(f"\n[ANALYSIS] Tour comparison summary:") + log_and_print(f"Tours in events file: {len(events_tour_ids):,}") + log_and_print(f"Tours in payload file: {len(payload_tour_ids):,}") + log_and_print(f"Tours missing from events: {len(missing_from_events):,}") + + if len(missing_from_events) > 0: + log_and_print( + f"\n[WARNING] Found {len(missing_from_events)} tours in payload file that are not in events file!") + payload_filtered_df = payload_df[payload_df['tourId'].isin(missing_from_events)].copy() + missing_events_file = os.path.join(run_directory, f"payloads--{run_label}--tours-missing-in-events.csv") + payload_filtered_df.to_csv(missing_events_file, index=False) + log_and_print(f"List of payloads which tours are missing from events saved to: {missing_events_file}") def determine_powertrain(row): @@ -427,6 +472,7 @@ def setup_logging(log_file): format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.FileHandler(log_file, mode='w'), logging.StreamHandler()]) + log_and_print(f"Run >> city:{city}, batch:{batch}, scenario:{scenario}, run:{run}, sample:{sample}") def log_and_print(message, level=logging.INFO): diff --git a/src/main/python/freight/freight_parking_events.py b/src/main/python/freight/freight_parking_events.py new file mode 100644 index 00000000000..a245f7829e9 --- /dev/null +++ b/src/main/python/freight/freight_parking_events.py @@ -0,0 +1,75 @@ +import pandas as pd +import os +import sys +import utils + +workspace = '~/Workspace/Data/FREIGHT/' +city = "austin" # "sfbay" +scenario = "parking-sensitivity" +batch = "2018_unlimited/" + +events_file = utils.construct_events_file_path(workspace, city, scenario, batch) +if len(sys.argv) >= 2: + events_file = str(sys.argv[1]) +dir_name = os.path.dirname(events_file) +basename = os.path.basename(events_file) +log_file = dir_name + "/log." + basename + +## ***************************************************** + +file_to_read = dir_name + "/park." + basename +if not os.path.exists(file_to_read): + file_to_read = dir_name + "/" + basename + +utils.print2(log_file, "reading: " + file_to_read) +#ParkingEvent, LeavingParkingEvent, ChargingPlugInEvent, ChargingPlugOutEvent, RefuelSessionEvent +data = utils.read_csv_in_chunks(file_to_read) +#data = utils.read_csv(dir_name + "/park." + basename) +utils.print2(log_file, "Read... " + str(data.type.unique())) + +# filtering +data_filtered = data.loc[data.type.isin(["ParkingEvent", "LeavingParkingEvent", "ChargingPlugInEvent", "ChargingPlugOutEvent", "RefuelSessionEvent"])] +data_filtered2 = data_filtered.loc[data_filtered.vehicle.str.startswith("carrier", na=False)] +utils.print2(log_file, data_filtered2.vehicleType.unique()) + +# saving +file_to_write = dir_name + "/park." + basename +data_filtered2.to_csv(file_to_write) +utils.print2(log_file, "writing to " + file_to_write) + +# second filtering +data_filtered3 = data_filtered2.loc[data.type.isin(["ParkingEvent", "LeavingParkingEvent", "ChargingPlugInEvent", "ChargingPlugOutEvent"])] +utils.print2(log_file, "Filtered and now counting number of vehicles parked at each parking zone") +# parkingTaz, parkingZoneId + +# Mapping for event to increment/decrement values +import numpy as np +event_mapping = { + 'ParkingEvent': (1, 0), + 'LeavingParkingEvent': (-1, 0), + 'ChargingPlugInEvent': (0, 1), + 'ChargingPlugOutEvent': (0, -1), +} + +# Map the type column to increment/decrement values +#data_filtered3['EventCounts'] = data_filtered3['type'].map(event_mapping) +data_filtered3['numVehicles'] = 0 +data_filtered3['numChargingVehicles'] = 0 +df_mapped = data_filtered3['type'].map(event_mapping) +df_mapped2 = pd.DataFrame(df_mapped.tolist(), columns=['numVehicles', 'numChargingVehicles'], index=data_filtered3.index) +data_filtered3.update(df_mapped2) +print(data_filtered3) +for col in ['numVehicles', 'numChargingVehicles']: + data_filtered3[col] = data_filtered3.groupby(['parkingTaz', 'parkingZoneId'])[col].fillna(0).cumsum() +print(data_filtered3) +#data_filtered3[['numVehicles', 'numChargingVehicles']] = data_filtered3['type'].map(event_mapping) +#data_filtered3[['numVehicles', 'numChargingVehicles']] = data_filtered3.groupby(['parkingTaz', 'parkingZoneId'])[['numVehicles', 'numChargingVehicles']].cumsum() + +# Use groupby with cumsum to get the cumulative sum for each group +# for col in ['numVehicles', 'numChargingVehicles']: +# data_filtered3[col] = 0 +# data_filtered3[col] = data_filtered3.groupby(['parkingTaz', 'parkingZoneId'])[col].cumsum() +# data_filtered2[['numVehicles', 'numChargingVehicles']] = data_filtered2['type'].map(event_mapping) + +data_filtered3.to_csv(dir_name + "/" + "park2." + basename) +utils.print2(log_file, "END") diff --git a/src/main/python/freight/frism_to_beam_freight_plans.py b/src/main/python/freight/frism_to_beam_freight_plans.py index e5ecc42d784..c5f2f6fe79c 100644 --- a/src/main/python/freight/frism_to_beam_freight_plans.py +++ b/src/main/python/freight/frism_to_beam_freight_plans.py @@ -1,40 +1,211 @@ -import pandas as pd +import multiprocessing as mp import os +import random +import sys +import warnings +from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path -import numpy as np +from typing import Tuple, List -primary_energy_files = { - "freight-md-D-Diesel-Baseline": "Freight_Baseline_FASTSimData_2020/Class_6_Box_truck_(Diesel,_2020,_no_program).csv", - "freight-md-E-BE-Baseline": "Freight_Baseline_FASTSimData_2020/Class_6_Box_truck_(BEV,_2025,_no_program).csv", - # "freight-md-E-H2FC-Baseline": np.nan, - "freight-md-E-PHEV-Baseline": "Freight_Baseline_FASTSimData_2020/Class_6_Box_truck_(BEV,_2025,_no_program).csv", - "freight-hdt-D-Diesel-Baseline": "Freight_Baseline_FASTSimData_2020/Class_8_Sleeper_cab_high_roof_(Diesel,_2020,_no_program).csv", - "freight-hdt-E-BE-Baseline": "Freight_Baseline_FASTSimData_2020/Class_8_Sleeper_cab_high_roof_(BEV,_2025,_no_program).csv", - # "freight-hdt-E-H2FC-Baseline": np.nan, - "freight-hdt-E-PHEV-Baseline": "Freight_Baseline_FASTSimData_2020/Class_8_Sleeper_cab_high_roof_(BEV,_2025,_no_program).csv", - "freight-hdv-D-Diesel-Baseline": "Freight_Baseline_FASTSimData_2020/Class_8_Box_truck_(Diesel,_2020,_no_program).csv", - "freight-hdv-E-BE-Baseline": "Freight_Baseline_FASTSimData_2020/Class_8_Box_truck_(BEV,_2025,_no_program).csv", - # "freight-hdv-E-H2FC-Baseline": np.nan, - "freight-hdv-E-PHEV-Baseline": "Freight_Baseline_FASTSimData_2020/Class_8_Box_truck_(BEV,_2025,_no_program).csv" -} - -secondary_energy_profile_for_phev = { - # "freight-md-D-Diesel-Baseline": np.nan, - # "freight-md-E-BE-Baseline": np.nan, - # "freight-md-E-H2FC-Baseline": np.nan, - "freight-md-E-PHEV-Baseline": ("Diesel", 9595.796035186175, 12000000000000000, - "Freight_Baseline_FASTSimData_2020/Class_6_Box_truck_(HEV,_2025,_no_program).csv"), - # "freight-hdt-D-Diesel-Baseline": np.nan, - # "freight-hdt-E-BE-Baseline": np.nan, - # "freight-hdt-E-H2FC-Baseline": np.nan, - "freight-hdt-E-PHEV-Baseline": ("Diesel", 13817.086117829229, 12000000000000000, - "Freight_Baseline_FASTSimData_2020/Class_8_Sleeper_cab_high_roof_(HEV,_2025,_no_program).csv"), - # "freight-hdv-D-Diesel-Baseline": np.nan, - # "freight-hdv-E-BE-Baseline": np.nan, - # "freight-hdv-E-H2FC-Baseline": np.nan, - "freight-hdv-E-PHEV-Baseline": ("Diesel", 14026.761465378302, 12000000000000000, - "Freight_Baseline_FASTSimData_2020/Class_8_Box_truck_(HEV,_2025,_no_program).csv") -} +import geopandas as gpd +import numpy as np +import pandas as pd +from pandas import DataFrame +from pyrosm import OSM +from scipy.spatial import cKDTree +from shapely.geometry import Point + +from estimate_stop_duration import update_operation_duration + +# Get the absolute path to the directory containing this script +current_dir = os.path.dirname(os.path.abspath(__file__)) + +# Go up to the parent directory that contains the 'python' directory +# If your file is in /path/to/python/freight/frism_to_beam_freight_plans.py +# This will add /path/to to sys.path +parent_dir = os.path.dirname(os.path.dirname(current_dir)) +sys.path.insert(0, parent_dir) + +# Now use absolute import +from python.utils.study_area_config import get_area_config +from python.utils.study_area_config import generate_network_name +from python.utils.study_area_config import constants + + +warnings.filterwarnings('ignore') + +# ************************************************************************************************ + +AREA = "sfbay" # sfbay +BATCH_NAME = "2024-01-23" +SCENARIO_NAME = "Baseline" +SCENARIO_SUFFIX = "" +FRISM_VERSION = 1.0 +# Coordinate snapping constants +BUFFER_DISTANCE_METERS = 100 # 100 meters +MAX_DISTANCE_METERS = 200000 # 200km +STUDY_AREA_CONFIG = get_area_config(AREA) +STUDY_AREA_CONFIG["network"]["graph_layers"]["residential"]["min_density_per_km2"] = 5500 +SNAP_COORDINATES = True + +# ************************************************************************************************ + + + + +# System and general constants +CHUNK_SIZE = 10000 # this affects speed and parallelization of the script +CONFIG_NAME = generate_network_name(STUDY_AREA_CONFIG) +NETWORK_DIR = f'{STUDY_AREA_CONFIG["work_dir"]}/network/{CONFIG_NAME}' +NETWORK_OSM_PBF = f'{NETWORK_DIR}/{CONFIG_NAME}.osm.pbf' +UTM_CRS = STUDY_AREA_CONFIG["geo"]["utm_epsg"] +YEAR = STUDY_AREA_CONFIG["census_year"] +SCENARIO_LABEL = SCENARIO_NAME.replace("_", "") +PRIMARY_ENERGY_PROFILE = STUDY_AREA_CONFIG["fastsim_routee_files"]["primary_powertrain"] +SECONDARY_ENERGY_PROFILE = STUDY_AREA_CONFIG["fastsim_routee_files"]["secondary_powertrain"] + +# File paths and directories +DIRECTORY_INPUT = f'{STUDY_AREA_CONFIG["work_dir"]}/frism/{BATCH_NAME}/{SCENARIO_NAME}' +DIRECTORY_BATCH = f'{STUDY_AREA_CONFIG["work_dir"]}/beam-ft/{BATCH_NAME}' +DIRECTORY_OUTPUT = f'{DIRECTORY_BATCH}/{YEAR}_{SCENARIO_LABEL}{SCENARIO_SUFFIX}' +DIRECTORY_VEHICLE_TECH = f'{STUDY_AREA_CONFIG["work_dir"]}/vehicle-tech' +DIRECTORY_SCENARIO = f'{DIRECTORY_OUTPUT}' +# if SNAP_COORDINATES: +# # Define the snapped directory path +# DIRECTORY_SCENARIO = f'{DIRECTORY_OUTPUT}--snapped-to-{CONFIG_NAME}' +# else: +# DIRECTORY_SCENARIO = f'{DIRECTORY_OUTPUT}' + +# Create necessary directories if they don't exist +Path(DIRECTORY_SCENARIO).mkdir(parents=True, exist_ok=True) +Path(DIRECTORY_VEHICLE_TECH).mkdir(parents=True, exist_ok=True) + +# Variables +_carriers = None +_payload_plans = None +_ondemand_plans = None +_tours = None +_vehicle_types = None +_tourId_with_prefix = {} + +# ****************************** + +def load_osm_network(pbf_path, min_distance_from_edge): + """ + Load OSM network and create/load buffered network with proper metric distances + + Args: + pbf_path (str): Path to original OSM PBF file + min_distance_from_edge (float): Buffer distance in meters + + Returns: + gpd.GeoDataFrame: Network edges with original and buffered geometries + + Raises: + ValueError: If the PBF file doesn't exist or if network extraction fails + """ + # Input validation + if not os.path.exists(pbf_path): + raise ValueError(f"PBF file not found: {pbf_path}") + + print(f"Loading OSM network from {pbf_path}...") + try: + osm = OSM(pbf_path) + edges = osm.get_network(network_type="driving") + except Exception as e: + raise ValueError(f"Failed to load OSM network: {str(e)}") + + # Ensure we have a GeoDataFrame + if not isinstance(edges, gpd.GeoDataFrame): + edges = gpd.GeoDataFrame(edges) + + if edges.empty: + raise ValueError("No network edges found in the PBF file") + + print(f"Creating {str(int(BUFFER_DISTANCE_METERS / 1000))}km road buffer...") + # Convert to UTM for proper metric distances + try: + edges_utm = edges.to_crs(epsg=UTM_CRS) + except Exception as e: + raise ValueError(f"Failed to convert to UTM (EPSG:{UTM_CRS}): {str(e)}") + + # Create buffer in UTM coordinates (where distances are in meters) + buffered_edges = edges_utm.copy() + buffered_edges['geometry'] = edges_utm['geometry'].buffer( + min_distance_from_edge, + cap_style=2, # flat ends + join_style=2 # mitered joins + ) + + # Add buffered geometry as a new column + edges_utm['buffered_geometry'] = buffered_edges.geometry + + # Create buffered pbf if it doesn't exist + path_without_ext, ext = os.path.splitext(pbf_path) + buffer_path = f"{path_without_ext}_{str(int(BUFFER_DISTANCE_METERS / 1000))}km_road_buffer.geojson" + + # Check if file exists and handle overwriting + if os.path.exists(buffer_path): + try: + os.remove(buffer_path) + print(f"Removed existing file: {buffer_path}") + except Exception as e: + print(f"Warning: Failed to remove existing file: {str(e)}") + + # Save buffered network + try: + # Save the buffered edges as GeoJSON + # Convert to geographic coordinates (EPSG:4326) for better compatibility + save_gdf = buffered_edges.to_crs(epsg=4326) + + # Make sure all columns are serializable + for col in save_gdf.columns: + if save_gdf[col].dtype == 'object': + save_gdf[col] = save_gdf[col].astype(str) + + # Save to GeoJSON + save_gdf.to_file(buffer_path, driver='GeoJSON') + print(f"Saved buffered network to: {buffer_path}") + except Exception as e: + print(f"Warning: Failed to save buffered network: {str(e)}") + raise e + + return edges_utm + + +def generate_random_point_near_line( + nearest_edge: gpd.GeoSeries, + point_geom: Point, + max_dist_meters: float +) -> Tuple[float, float]: + """ + Generate a random point within max_dist_meters of the nearest point on the road + + Args: + nearest_edge: GeoSeries row containing the road geometry + point_geom: Original point (Shapely Point) + max_dist_meters: Maximum distance from road (e.g., 200) + + Returns: + Tuple of (x, y) coordinates for the new random point + """ + # Find nearest point on the road + proj_distance = nearest_edge.geometry.project(point_geom) + nearest_point = nearest_edge.geometry.interpolate(proj_distance) + + # Generate random angle and distance + angle = random.uniform(0, 2 * np.pi) # Random angle between 0 and 2π + distance = random.uniform(0, max_dist_meters) # Random distance up to max + + # Convert to x,y offset + dx = distance * np.cos(angle) + dy = distance * np.sin(angle) + + # Create new point + new_x = nearest_point.x + dx + new_y = nearest_point.y + dy + + return new_x, new_y def read_csv_file(filename_): @@ -71,180 +242,17 @@ def add_prefix(prefix, column, row, to_num=True, store_dict=None, veh_type=False return new -frism_version = 1.5 -city = "seattle" -scenario_name = "2024-04-20" -year, run_name = "2018", "Baseline" -# year, run_name = "2050", "Ref_highp6" -run_name_label = run_name.replace("_", "") - -# work_dir = os.path.expanduser(f'/Volumes/HG40/Workspace') -work_dir = os.path.expanduser(f'~/Workspace') -directory_input = f'{work_dir}/Simulation/{city}/frism/{scenario_name}/{run_name}' -directory_output = f'{work_dir}/Simulation/{city}/beam-freight/{scenario_name}/{year}_{run_name_label}' -Path(directory_output).mkdir(parents=True, exist_ok=True) -directory_vehicle_tech = f'{directory_output}/vehicle-tech' -Path(directory_vehicle_tech).mkdir(parents=True, exist_ok=True) -carriers = None -payload_plans = None -ondemand_plans = None -tours = None -vehicle_types = None -tourId_with_prefix = {} - -for filename in sorted(os.listdir(directory_input)): - filepath = f'{directory_input}/{filename}' - print(filepath) - parts = filename.split('_', 2) - if len(parts) < 3: - print("Warning! could not read file: ", filename) - continue - business_type = parts[0].lower() - county = parts[1].lower() - filetype = parts[2].lower() - - if "carrier" in filetype: - df = pd.read_csv(filepath) - # df['carrierId'] = df.apply(lambda row: add_prefix(f'{business_type}-{county}-', 'carrierId', row), axis=1) - # df['vehicleId'] = df.apply(lambda row: add_prefix(f'{business_type}-{county}-', 'vehicleId', row), axis=1) - df['carrierId'] = df.apply(lambda row: add_prefix(f'', 'carrierId', row, False), axis=1).tolist() - df['vehicleTypeId'] = df.apply( - lambda row: add_prefix('', 'vehicleTypeId', row, to_num=True, store_dict=None, veh_type=True, - suffix=f"-{year}-{run_name_label}"), - axis=1).tolist() - df['vehicleId'] = df.apply(lambda row: add_prefix(row['carrierId'] + '-', 'vehicleId', row), axis=1).tolist() - # df['tourId'] = df.apply(lambda row: add_prefix(f'{business_type}-{county}-', 'tourId', row), axis=1) - df['tourId'] = df.apply(lambda row: add_prefix(f'{business_type}-', 'tourId', row, True, tourId_with_prefix), - axis=1).tolist() - if carriers is None: - carriers = df - else: - carriers = pd.concat([carriers, df]) - elif "freight_tours" in filetype: - df = pd.read_csv(filepath) - # df['tour_id'] = df.apply(lambda row: add_prefix(f'{business_type}-{county}-', 'tour_id', row), axis=1) - df['tour_id'] = df.apply(lambda row: tourId_with_prefix[str(int(row['tour_id']))], axis=1).tolist() - if tours is None: - tours = df - else: - tours = pd.concat([tours, df]) - elif "payload" in filetype: - df = pd.read_csv(filepath) - if "ondemand" in county: - df['tourId'] = df.apply(lambda row: add_prefix(f'ridehail-', 'tourId', row), axis=1) - if ondemand_plans is None: - ondemand_plans = df - else: - ondemand_plans = pd.concat([ondemand_plans, df]) - else: - df['tourId'] = df.apply(lambda row: tourId_with_prefix[str(int(row['tourId']))], axis=1).tolist() - df['payloadId'] = df.apply(lambda row: add_prefix('', 'payloadId', row, False), axis=1).tolist() - tourId_with_prefix = {} - if payload_plans is None: - payload_plans = df - else: - payload_plans = pd.concat([payload_plans, df]) - elif "vehicle_types" in filename: - df = pd.read_csv(filepath) - empty_vectors = list(np.repeat("", len(df.index))) - # JoulePerMeter = 121300000/(mpgge*1609.34) - vehicle_types_ids = df.apply( - lambda row: add_prefix('', 'veh_type_id', row, to_num=True, store_dict=None, veh_type=True, - suffix=f"-{year}-{run_name_label}"), axis=1).tolist() - vehicles_techs = { - "vehicleTypeId": vehicle_types_ids, - "seatingCapacity": list(np.repeat(1, len(df.index))), - "standingRoomCapacity": list(np.repeat(0, len(df.index))), - "lengthInMeter": list(np.repeat(12, len(df.index))), - "primaryFuelType": df["primary_fuel_type"], - "primaryFuelConsumptionInJoulePerMeter": np.divide(121300000, - np.float64(df["primary_fuel_rate"]) * 1609.34), - "primaryFuelCapacityInJoule": list(np.repeat(12000000000000000, len(df.index))), - "primaryVehicleEnergyFile": [primary_energy_files[id] if id in primary_energy_files else np.nan for id in - vehicle_types_ids], - "secondaryFuelType": [ - secondary_energy_profile_for_phev[id][0] if id in secondary_energy_profile_for_phev else np.nan for id - in vehicle_types_ids], - "secondaryFuelConsumptionInJoulePerMeter": [ - secondary_energy_profile_for_phev[id][1] if id in secondary_energy_profile_for_phev else np.nan for id - in vehicle_types_ids], - "secondaryVehicleEnergyFile": [ - secondary_energy_profile_for_phev[id][3] if id in secondary_energy_profile_for_phev else np.nan for id - in vehicle_types_ids], - "secondaryFuelCapacityInJoule": [ - secondary_energy_profile_for_phev[id][2] if id in secondary_energy_profile_for_phev else np.nan for id - in vehicle_types_ids], - "automationLevel": list(np.repeat(1, len(df.index))), - "maxVelocity": df["max_speed(mph)"], # convert to meter per second - "passengerCarUnit": empty_vectors, - "rechargeLevel2RateLimitInWatts": empty_vectors, - "rechargeLevel3RateLimitInWatts": empty_vectors, - "vehicleCategory": list(np.repeat("Class456Vocational", len(df.index))), - "sampleProbabilityWithinCategory": empty_vectors, - "sampleProbabilityString": empty_vectors, - "payloadCapacityInKg": df["payload_capacity_weight"], - "vehicleClass": df["veh_class"] - } - df2 = pd.DataFrame(vehicles_techs) - df2["vehicleCategory"] = np.where(df2["vehicleTypeId"].str.contains('hdv'), 'Class78Vocational', - df2.vehicleCategory) - df2["vehicleCategory"] = np.where(df2["vehicleTypeId"].str.contains('hdt'), 'Class78Tractor', - df2.vehicleCategory) - df2["vehicleCategory"] = np.where(df2["vehicleTypeId"].str.contains('ld'), 'Class2b3Vocational', - df2.vehicleCategory) - if vehicle_types is None: - vehicle_types = df2 - else: - vehicle_types = pd.concat([vehicle_types, df2]) - else: - print(f'SKIPPING {filename}') - -vehicle_types.to_csv( - f'{directory_vehicle_tech}/ft-vehicletypes--{scenario_name.replace("-", "")}--{year}-{run_name_label}.csv', - index=False) - -# In[9]: - - -# carrierId,tourId,vehicleId,vehicleTypeId,warehouseZone,warehouseX,warehouseY,MESOZONE,BoundaryZONE -# carrierId,tourId,vehicleId,vehicleTypeId,warehouseZone,warehouseX,warehouseY,MESOZONE,BoundaryZONE -carriers_renames = { - 'depot_zone': 'warehouseZone', - 'depot_zone_x': 'warehouseX', - 'depot_zone_y': 'warehouseY', - 'true_depot_zone': 'mesoZone' -} -carriers_drop = ['x', 'y', 'index'] -carriers.rename(columns=carriers_renames, inplace=True) -carriers.drop(carriers_drop, axis=1, inplace=True, errors='ignore') -carriers['warehouseZone'] = carriers['warehouseZone'].astype(int) -carriers.to_csv(f'{directory_output}/carriers--{year}-{run_name_label}.csv', index=False) - -# In[10]: - - -# tourId,departureTimeInSec,departureLocationZone,maxTourDurationInSec,departureLocationX,departureLocationY -# tourId,departureTimeInSec,departureLocationZone,maxTourDurationInSec,departureLocationX,departureLocationY -tours_renames = { - 'tour_id': 'tourId', - 'departureLocation_zone': 'departureLocationZone', - 'departureLocation_x': 'departureLocationX', - 'departureLocation_y': 'departureLocationY', - 'true_depot_zone': 'mesoZone' -} -tours.rename(columns=tours_renames, inplace=True) -tours['departureTimeInSec'] = tours['departureTimeInSec'].astype(int) -tours['maxTourDurationInSec'] = tours['maxTourDurationInSec'].astype(int) -tours['departureLocationZone'] = tours['departureLocationZone'].astype(int) -tours.drop(['index'], axis=1, inplace=True, errors='ignore') -tours.to_csv(f'{directory_output}/tours--{year}-{run_name_label}.csv', index=False) - - -# In[11]: - - -# payloadId,sequenceRank,tourId,payloadType,weightInKg,requestType,locationZone,estimatedTimeOfArrivalInSec,arrivalTimeWindowInSecLower,arrivalTimeWindowInSecUpper,operationDurationInSec,locationX,locationY -def format_payload(_payload): +def format_payload(_payload_plans: pd.DataFrame) -> pd.DataFrame: + """ + Format payload and adjust coordinates where needed using road buffer for efficiency + + Args: + _payload_plans (DataFrame): Input payload data + + Returns: + DataFrame: Formatted payload data + """ + # Rename columns and convert data types payload_plans_renames = { 'arrivalTimeWindowInSec_lower': 'arrivalTimeWindowInSecLower', 'arrivalTimeWindowInSec_upper': 'arrivalTimeWindowInSecUpper', @@ -254,10 +262,16 @@ def format_payload(_payload): 'BuyerNAICS': "buyerNAICS", "SellerNAICS": "sellerNAICS" } - payload_plans_drop = ['truck_mode', 'weightInlb', 'cummulativeWeightInlb', 'index'] - int_columns = ['sequenceRank', 'payloadType', 'requestType', 'estimatedTimeOfArrivalInSec', - 'arrivalTimeWindowInSecLower', 'arrivalTimeWindowInSecUpper', - 'operationDurationInSec', 'locationZone'] + _payload_plans.rename(columns=payload_plans_renames, inplace=True) + + int_columns = [ + 'sequenceRank', 'payloadType', 'requestType', 'estimatedTimeOfArrivalInSec', + 'arrivalTimeWindowInSecLower', 'arrivalTimeWindowInSecUpper', + 'operationDurationInSec', 'locationZone' + ] + _payload_plans[int_columns] = _payload_plans[int_columns].astype(int) + + # Map payload types and process weights payload_type_map = { 1: 'bulk', 2: 'fuel_fert', @@ -265,47 +279,568 @@ def format_payload(_payload): 4: 'mfr_goods', 5: 'others' } - - _payload.rename(columns=payload_plans_renames, inplace=True) - - # Convert columns to integer type - for col in int_columns: - _payload[col] = _payload[col].astype(int) - - _payload['payloadType'] = _payload['payloadType'].map(payload_type_map) - # Convert weightInlb to weightInKg without applying abs yet - _payload['weightInKg'] = _payload['weightInlb'].astype(float) * 0.45359237 - - # Apply modifications for frism version > 1.0 - if frism_version > 1.0: - # Create DeliveryType column - _payload['deliveryType'] = _payload['requestType'].map({1: 'delivery-only', 3: 'pickup-delivery'}) - - # Update requestType based on weightInKg - _payload['requestType'] = _payload['requestType'].astype('object') - _payload.loc[_payload['weightInKg'] < 0, 'requestType'] = 'unloading' - _payload.loc[_payload['weightInKg'] >= 0, 'requestType'] = 'loading' - - # Now make weightInKg positive - _payload['weightInKg'] = np.abs(_payload['weightInKg']) - - _payload['fleetType'] = _payload['truck_mode'].map({ + _payload_plans['payloadType'] = _payload_plans['payloadType'].map(payload_type_map) + _payload_plans['weightInKg'] = _payload_plans['weightInlb'].astype(float) * 0.45359237 + + # Handle different FRISM versions + if FRISM_VERSION > 1.0: + _payload_plans['deliveryType'] = _payload_plans['requestType'].map({ + 1: 'delivery-only', + 3: 'pickup-delivery' + }) + _payload_plans['requestType'] = _payload_plans['requestType'].astype('object') + _payload_plans.loc[_payload_plans['weightInKg'] < 0, 'requestType'] = 'unloading' + _payload_plans.loc[_payload_plans['weightInKg'] >= 0, 'requestType'] = 'loading' + _payload_plans['weightInKg'] = np.abs(_payload_plans['weightInKg']) + + _payload_plans['fleetType'] = _payload_plans['truck_mode'].map({ 'Private Truck': 'private', 'For-hire Truck': 'for-hire' - }, na_action='ignore') # This keeps NA values as they are + }, na_action='ignore') else: - # For frism version 1.0, just ensure weightInKg is positive - _payload['requestType'] = _payload['requestType'].map({1: 'unloading', 0: 'loading'}) - _payload['weightInKg'] = np.abs(_payload['weightInKg']) + # _payload_plans['requestType'] = _payload_plans['requestType'].map({1: 'unloading', 0: 'loading'}) + _payload_plans.loc[_payload_plans['weightInKg'] < 0, 'requestType'] = 'unloading' + _payload_plans.loc[_payload_plans['weightInKg'] >= 0, 'requestType'] = 'loading' + _payload_plans['weightInKg'] = np.abs(_payload_plans['weightInKg']) + + # Clean up unnecessary columns + payload_plans_drop = ['truck_mode', 'weightInlb', 'cummulativeWeightInlb', 'index'] + _payload_plans.drop(payload_plans_drop, axis=1, inplace=True, errors='ignore') + + return _payload_plans + + +## ################################ +## Snapping coordinates section + +def create_spatial_index_kdtree(edges_gdf_utm: gpd.GeoDataFrame) -> Tuple[np.ndarray, cKDTree]: + """Create KD-tree spatial index from UTM coordinates for faster nearest neighbor queries""" + # Extract centroids of line segments in UTM coordinates + centroids = np.array([[geom.centroid.x, geom.centroid.y] for geom in edges_gdf_utm.geometry]) + return centroids, cKDTree(centroids) + + +def find_nearest_edge_kdtree( + point_utm: Point, + edges_gdf_utm: gpd.GeoDataFrame, + kdtree: cKDTree, + k: int = 5 +) -> Tuple[float, gpd.GeoSeries]: + """ + Find nearest edge using KD-tree with vectorized distance calculations in UTM coordinates + + Args: + point_utm: Point geometry in UTM coordinates + edges_gdf_utm: GeoDataFrame containing network edges in UTM + kdtree: cKDTree spatial index + k: Number of nearest neighbors to check + + Returns: + Tuple of (minimum distance in meters, nearest edge) + """ + # Find k nearest neighbors using KD-tree + distances, indices = kdtree.query([point_utm.x, point_utm.y], k=k) + + # Calculate actual distances to the k nearest edges in meters (UTM) + candidate_edges = edges_gdf_utm.iloc[indices] + actual_distances = candidate_edges.geometry.distance(point_utm) + + min_idx = actual_distances.idxmin() + return actual_distances.min(), edges_gdf_utm.loc[min_idx] + + +def generate_random_point_near_line_utm( + nearest_edge_utm: gpd.GeoSeries, + point_utm: Point, + max_dist_meters: float +) -> Tuple[float, float]: + """ + Generate a random point within max_dist_meters of the nearest point on the road in UTM coordinates + """ + # Find nearest point on the road + proj_distance = nearest_edge_utm.geometry.project(point_utm) + nearest_point = nearest_edge_utm.geometry.interpolate(proj_distance) + + # Generate random angle and distance + angle = np.random.uniform(0, 2 * np.pi) + distance = np.random.uniform(0, max_dist_meters) + + # Convert to x,y offset (in meters since we're in UTM) + dx = distance * np.cos(angle) + dy = distance * np.sin(angle) + + # Create new UTM coordinates + new_x_utm = nearest_point.x + dx + new_y_utm = nearest_point.y + dy + + return new_x_utm, new_y_utm + + +def process_points_chunk_vectorized( + points_chunk: np.ndarray, + edges_gdf_utm: gpd.GeoDataFrame, + kdtree: cKDTree, + min_distance: float, + max_distance: float, + chunk_start_idx: int, + coordinate_lookup: dict +) -> List[Tuple[int, float, float, bool, bool]]: + """ + Process a chunk of points using vectorized operations with proper CRS handling + + Args: + points_chunk: Array of coordinate pairs to process + edges_gdf_utm: GeoDataFrame containing network edges in UTM + kdtree: Spatial index for quick nearest neighbor lookups + min_distance: Minimum allowed distance from road + max_distance: Maximum allowed distance from road + chunk_start_idx: Starting index of current chunk + coordinate_lookup: Dictionary storing previously processed coordinates + """ + results = [] + cache_hits = 0 + + # Convert input points to UTM for distance calculations + points_gdf = gpd.GeoDataFrame( + geometry=[Point(x, y) for x, y in points_chunk], + crs=4326 + ).to_crs(UTM_CRS) + + for idx, (point_utm, orig_point) in enumerate(zip(points_gdf.geometry, points_chunk)): + try: + # Check lookup table first + coord_key = (orig_point[0], orig_point[1]) + if coord_key in coordinate_lookup: + cached_result = coordinate_lookup[coord_key] + results.append(( + chunk_start_idx + idx, + cached_result[0], + cached_result[1], + cached_result[2], + cached_result[3] + )) + cache_hits += 1 + continue + + # Find nearest edge using UTM coordinates + min_dist, nearest_edge_utm = find_nearest_edge_kdtree(point_utm, edges_gdf_utm, kdtree) + + is_far = min_dist > max_distance + needs_adjustment = min_dist > min_distance and not is_far + + if needs_adjustment: + # Generate new point in UTM coordinates + new_x_utm, new_y_utm = generate_random_point_near_line_utm( + nearest_edge_utm, + point_utm, + min_distance + ) + + # Convert back to original CRS (WGS84) + point_updated = gpd.GeoDataFrame( + geometry=[Point(new_x_utm, new_y_utm)], + crs=UTM_CRS + ).to_crs(4326).geometry[0] + + result = ( + chunk_start_idx + idx, + point_updated.x, + point_updated.y, + is_far, + True + ) + else: + result = ( + chunk_start_idx + idx, + orig_point[0], + orig_point[1], + is_far, + False + ) + + # Store in lookup table + coordinate_lookup[coord_key] = result[1:] + results.append(result) + + except Exception as e: + print(f"Warning: Error processing point {chunk_start_idx + idx}: {str(e)}") + results.append(( + chunk_start_idx + idx, + orig_point[0], + orig_point[1], + False, + False + )) + + if cache_hits > 0: + print(f"Cache hits in chunk: {cache_hits}/{len(points_chunk)}") + return results + + +def snap_coordinates_when_too_far(_df: pd.DataFrame, + osm_edges_utm: gpd.GeoDataFrame, + x_column: str, + y_column: str, + coordinate_lookup: dict = None) -> tuple[pd.DataFrame, dict]: + """ + Optimized version of coordinate snapping using KD-tree spatial indexing and lookup table + + Args: + _df: DataFrame with coordinate columns in WGS84 + osm_edges_utm: GeoDataFrame with network in UTM + x_column: Name of the column containing X coordinates (longitude) + y_column: Name of the column containing Y coordinates (latitude) + coordinate_lookup: Optional existing lookup table to use + + Returns: + Tuple of (DataFrame with snapped coordinates in WGS84, coordinate lookup dictionary) + """ + min_distance_from_edge = BUFFER_DISTANCE_METERS + max_distance_from_edge = MAX_DISTANCE_METERS + + if coordinate_lookup is None: + coordinate_lookup = {} + print("Creating new coordinate lookup table...") + else: + print(f"Using existing lookup table with {len(coordinate_lookup)} entries...") + + print("Creating KD-tree spatial index...") + centroids, kdtree = create_spatial_index_kdtree(osm_edges_utm) + + # Extract coordinates in original CRS (WGS84) + coords = np.column_stack(( + _df[x_column].values, + _df[y_column].values + )) + + # Calculate optimal chunk size based on available CPU cores + num_cores = max(1, mp.cpu_count() - 1) + chunk_size = min(CHUNK_SIZE, max(1000, len(coords) // (num_cores * 2))) + n_chunks = (len(coords) + chunk_size - 1) // chunk_size + + print(f"Processing {len(coords)} points in {n_chunks} chunks using {num_cores} cores...") + + all_results = [] + far_points = 0 + total_adjusted = 0 + + # Process chunks in parallel using ThreadPoolExecutor + with ThreadPoolExecutor(max_workers=num_cores) as executor: + futures = [] + + for chunk_idx in range(n_chunks): + start_idx = chunk_idx * chunk_size + end_idx = min((chunk_idx + 1) * chunk_size, len(coords)) + chunk_coords = coords[start_idx:end_idx] + + future = executor.submit( + process_points_chunk_vectorized, + chunk_coords, + osm_edges_utm, + kdtree, + min_distance_from_edge, + max_distance_from_edge, + start_idx, + coordinate_lookup + ) + futures.append(future) + + # Collect results as they complete + for future in as_completed(futures): + try: + results = future.result() + for _, _, _, is_far, is_adjusted in results: + if is_far: + far_points += 1 + if is_adjusted: + total_adjusted += 1 + all_results.extend(results) + except Exception as e: + print(f"Error processing chunk: {str(e)}") + + if far_points > 0: + print(f"Warning: {far_points} points are farther than {int(max_distance_from_edge / 1000)} km from any road") + if total_adjusted > 0: + print(f"Adjusted {total_adjusted} points to be within {int(min_distance_from_edge / 1000)} km of nearest road") + + # Sort results and update DataFrame efficiently + all_results.sort(key=lambda r: r[0]) + result_indices = [r[0] for r in all_results] + x_coords = [r[1] for r in all_results] + y_coords = [r[2] for r in all_results] + + result_df = _df.copy() + result_df[x_column] = pd.Series(x_coords, index=result_indices) + result_df[y_column] = pd.Series(y_coords, index=result_indices) + + return result_df, coordinate_lookup + + + +############################# +## MAIN + +if __name__ == '__main__': + # Add these at the beginning of your main code, after the variables section + # Dictionary to store vehicle class and fuel rate mappings + vehicle_class_fuel_rates = {} + + for filename in sorted(os.listdir(DIRECTORY_INPUT)): + filepath = f'{DIRECTORY_INPUT}/{filename}' + print(filepath) + parts = filename.split('_', 2) + if len(parts) < 3: + print("Warning! could not read file: ", filename) + continue + business_type = parts[0].lower() + county = parts[1].lower() + filetype = parts[2].lower() + + if "carrier" in filetype: + df = pd.read_csv(filepath) + # df['carrierId'] = df.apply(lambda row: add_prefix(f'{business_type}-{county}-', 'carrierId', row), axis=1) + # df['vehicleId'] = df.apply(lambda row: add_prefix(f'{business_type}-{county}-', 'vehicleId', row), axis=1) + df['carrierId'] = df.apply(lambda row: add_prefix(f'', 'carrierId', row, False), axis=1).tolist() + df['vehicleTypeId'] = df.apply( + lambda row: add_prefix('', 'vehicleTypeId', row, to_num=True, store_dict=None, veh_type=True, + suffix=f"-{YEAR}-{SCENARIO_LABEL}"), + axis=1).tolist() + df['vehicleId'] = df.apply(lambda row: add_prefix(row['carrierId'] + '-', 'vehicleId', row), + axis=1).tolist() + # df['tourId'] = df.apply(lambda row: add_prefix(f'{business_type}-{county}-', 'tourId', row), axis=1) + df['tourId'] = df.apply( + lambda row: add_prefix(f'{business_type}-', 'tourId', row, True, _tourId_with_prefix), + axis=1).tolist() + if _carriers is None: + _carriers = df + else: + _carriers = pd.concat([_carriers, df]) + elif "freight_tours" in filetype: + df = pd.read_csv(filepath) + # df['tour_id'] = df.apply(lambda row: add_prefix(f'{business_type}-{county}-', 'tour_id', row), axis=1) + df['tour_id'] = df.apply(lambda row: _tourId_with_prefix[str(int(row['tour_id']))], axis=1).tolist() + if _tours is None: + _tours = df + else: + _tours = pd.concat([_tours, df]) + elif "payload" in filetype: + df = pd.read_csv(filepath) + if "ondemand" in county: + df['tourId'] = df.apply(lambda row: add_prefix(f'ridehail-', 'tourId', row), axis=1) + if _ondemand_plans is None: + _ondemand_plans = df + else: + _ondemand_plans = pd.concat([_ondemand_plans, df]) + else: + df['tourId'] = df.apply(lambda row: _tourId_with_prefix[str(int(row['tourId']))], axis=1).tolist() + df['payloadId'] = df.apply(lambda row: add_prefix('', 'payloadId', row, False), axis=1).tolist() + _tourId_with_prefix = {} + if _payload_plans is None: + _payload_plans = df + else: + _payload_plans = pd.concat([_payload_plans, df]) + elif "vehicle_types" in filename: # Modify the "vehicle_types" section in the main loop + df = pd.read_csv(filepath) + + # First pass: collect vehicle class and fuel rate information for non-PHEV vehicles + for _, row in df.iterrows(): + veh_class = row['veh_class'] + fuel_type = row['primary_fuel_type'] + fuel_rate = row['primary_fuel_rate'] + + if 'PHEV' not in str(row['veh_type_id']): + vehicle_class_fuel_rates[f"{veh_class}-{fuel_type}"] = fuel_rate + + # Process all vehicles, handling PHEVs specially + empty_vectors = list(np.repeat("", len(df.index))) + vehicle_types_ids = [] + original_vehicle_types_ids = [] + primary_fuel_types = [] + primary_fuel_consumption = [] + primary_fuel_capacities = [] + secondary_fuel_types = [] + secondary_fuel_consumption = [] + secondary_fuel_capacities = [] + + for _, row in df.iterrows(): + veh_type_id = add_prefix('', 'veh_type_id', row, to_num=True, + store_dict=None, veh_type=True, suffix=f"-{YEAR}-{SCENARIO_LABEL}") + vehicle_types_ids.append(veh_type_id) + + original_veh_type_id = add_prefix('', 'veh_type_id', row, to_num=True, + store_dict=None, veh_type=True, suffix="") + original_vehicle_types_ids.append(original_veh_type_id) + + veh_class = row['veh_class'] + fuel_type = row['primary_fuel_type'] + + # Check if this is a PHEV vehicle + if 'PHEV' in str(row['veh_type_id']): + if f"{veh_class}-Electricity" in vehicle_class_fuel_rates and f"{veh_class}-{fuel_type}" in vehicle_class_fuel_rates: + # Primary + primary_fuel_types.append('Electricity') + fuel_rate_1 = vehicle_class_fuel_rates[f"{veh_class}-Electricity"] + primary_fuel_consumption.append(constants["joule_per_meter_base_rate"] / (float(fuel_rate_1) * 1609.34)) + primary_fuel_capacities.append(12000000000000000 * 0.25) # 25% of standard capacity + + # Secondary + secondary_fuel_types.append(fuel_type) + fuel_rate_2 = vehicle_class_fuel_rates[f"{veh_class}-{fuel_type}"] + secondary_fuel_consumption.append(constants["joule_per_meter_base_rate"] / (float(fuel_rate_2) * 1609.34)) + secondary_fuel_capacities.append(12000000000000000 * 0.75) # 75% of standard capacity + else: + # For non-PHEV vehicles, use standard processing + primary_fuel_types.append(row["primary_fuel_type"]) + primary_fuel_consumption.append(constants["joule_per_meter_base_rate"] / + (np.float64(row["primary_fuel_rate"]) * 1609.34)) + primary_fuel_capacities.append(12000000000000000) + secondary_fuel_types.append(np.nan) + secondary_fuel_consumption.append(np.nan) + secondary_fuel_capacities.append(np.nan) + + # Create the vehicles techs dictionary with our processed values + vehicles_techs = { + "vehicleTypeId": vehicle_types_ids, + "seatingCapacity": list(np.repeat(1, len(df.index))), + "standingRoomCapacity": list(np.repeat(0, len(df.index))), + "lengthInMeter": list(np.repeat(12, len(df.index))), + "primaryFuelType": primary_fuel_types, + "primaryFuelConsumptionInJoulePerMeter": primary_fuel_consumption, + "primaryFuelCapacityInJoule": primary_fuel_capacities, + "primaryVehicleEnergyFile": [ + PRIMARY_ENERGY_PROFILE[index] if index in PRIMARY_ENERGY_PROFILE else np.nan + for index in original_vehicle_types_ids], + "secondaryFuelType": secondary_fuel_types, + "secondaryFuelConsumptionInJoulePerMeter": secondary_fuel_consumption, + "secondaryFuelCapacityInJoule": secondary_fuel_capacities, + "secondaryVehicleEnergyFile": [ + SECONDARY_ENERGY_PROFILE[index][3] if index in SECONDARY_ENERGY_PROFILE else np.nan for + index in original_vehicle_types_ids], + "automationLevel": list(np.repeat(1, len(df.index))), + "maxVelocity": df["max_speed(mph)"], + "passengerCarUnit": empty_vectors, + "rechargeLevel2RateLimitInWatts": empty_vectors, + "rechargeLevel3RateLimitInWatts": empty_vectors, + "vehicleCategory": list(np.repeat("Class456Vocational", len(df.index))), + "sampleProbabilityWithinCategory": empty_vectors, + "sampleProbabilityString": empty_vectors, + "payloadCapacityInKg": df["payload_capacity_weight"], + "vehicleClass": df["veh_class"] + } + + df2 = pd.DataFrame(vehicles_techs) + df2["vehicleCategory"] = np.where(df2["vehicleTypeId"].str.contains('hdv'), 'Class78Vocational', + df2.vehicleCategory) + df2["vehicleCategory"] = np.where(df2["vehicleTypeId"].str.contains('hdt'), 'Class78Tractor', + df2.vehicleCategory) + df2["vehicleCategory"] = np.where(df2["vehicleTypeId"].str.contains('ld'), 'Class2b3Vocational', + df2.vehicleCategory) + + if _vehicle_types is None: + _vehicle_types = df2 + else: + _vehicle_types = pd.concat([_vehicle_types, df2]) + else: + print(f'SKIPPING {filename}') - _payload.drop(payload_plans_drop, axis=1, inplace=True, errors='ignore') - return _payload + _vehicle_types.to_csv( + f'{DIRECTORY_VEHICLE_TECH}/ft-vehicletypes--{BATCH_NAME.replace("-", "")}--{YEAR}-{SCENARIO_LABEL}.csv', + index=False) -# Save the modified DataFrame -format_payload(payload_plans).to_csv(f'{directory_output}/payloads--{year}-{run_name_label}.csv', index=False) + # Load OSM network and create buffer + _osm_edges_utm = load_osm_network( + NETWORK_OSM_PBF, + min_distance_from_edge=BUFFER_DISTANCE_METERS + ) -if ondemand_plans is not None: - format_payload(ondemand_plans).to_csv(f'{directory_output}/ondemand--{year}-{run_name_label}.csv', index=False) + _coordinate_lookup = {} -print("END") + # carrierId,tourId,vehicleId,vehicleTypeId,warehouseZone,warehouseX,warehouseY,MESOZONE,BoundaryZONE + carriers_renames = { + 'depot_zone': 'warehouseZone', + 'depot_zone_x': 'warehouseX', + 'depot_zone_y': 'warehouseY', + 'true_depot_zone': 'mesoZone' + } + carriers_drop = ['x', 'y', 'index'] + _carriers.rename(columns=carriers_renames, inplace=True) + _carriers.drop(carriers_drop, axis=1, inplace=True, errors='ignore') + _carriers['warehouseZone'] = _carriers['warehouseZone'].astype(int) + if SNAP_COORDINATES: + _carriers, _coordinate_lookup = snap_coordinates_when_too_far( + _carriers, + _osm_edges_utm, + "warehouseX", + "warehouseY", + _coordinate_lookup + ) + # Write + _carriers.to_csv(f'{DIRECTORY_SCENARIO}/carriers--{YEAR}-{SCENARIO_LABEL}.csv', index=False) + + # tourId,departureTimeInSec,departureLocationZone,maxTourDurationInSec,departureLocationX,departureLocationY + tours_renames = { + 'tour_id': 'tourId', + 'departureLocation_zone': 'departureLocationZone', + 'departureLocation_x': 'departureLocationX', + 'departureLocation_y': 'departureLocationY', + 'true_depot_zone': 'mesoZone' + } + _tours.rename(columns=tours_renames, inplace=True) + _tours['departureTimeInSec'] = _tours['departureTimeInSec'].astype(int) + _tours['maxTourDurationInSec'] = _tours['maxTourDurationInSec'].astype(int) + _tours['departureLocationZone'] = _tours['departureLocationZone'].astype(int) + _tours.drop(['index'], axis=1, inplace=True, errors='ignore') + if SNAP_COORDINATES: + _tours, _coordinate_lookup = snap_coordinates_when_too_far( + _tours, + _osm_edges_utm, + "departureLocationX", + "departureLocationY", + _coordinate_lookup + ) + # Write + _tours.to_csv(f'{DIRECTORY_SCENARIO}/tours--{YEAR}-{SCENARIO_LABEL}.csv', index=False) + + # Process payloads + print("Processing payload plans...") + # Add random_state for reproducibility + # sampled_df = _payload_plans.sample(n=1000, random_state=42).copy().reset_index(drop=True) + # sampled_df.to_csv(f'{DIRECTORY_OUTPUT}/payloads-sampled--{YEAR}-{SCENARIO_LABEL}.csv', index=False) + # Then format and save + # Create shared coordinate lookup table + _payload_plans = format_payload(_payload_plans) + if SNAP_COORDINATES: + # Snap coordinates and save + _payload_plans, _coordinate_lookup = snap_coordinates_when_too_far( + _payload_plans, + _osm_edges_utm, + "locationX", + "locationY", + _coordinate_lookup + ) + _payload_plans["operationDurationInSecOG"] = _payload_plans["operationDurationInSec"] + _payload_plans = update_operation_duration(STUDY_AREA_CONFIG, _payload_plans, _tours, _carriers, _vehicle_types) + _payload_plans.to_csv(f'{DIRECTORY_SCENARIO}/payloads--{YEAR}-{SCENARIO_LABEL}.csv', index=False) + + if _ondemand_plans is not None: + print("Processing ondemand plans...") + _ondemand_plans = format_payload(_ondemand_plans) + if SNAP_COORDINATES: + # Snap coordinates and save, reusing the lookup table + _ondemand_plans, _coordinate_lookup = snap_coordinates_when_too_far( + _ondemand_plans, + _osm_edges_utm, + "locationX", + "locationY", + _coordinate_lookup + ) + _ondemand_plans.to_csv(f'{DIRECTORY_SCENARIO}/ondemand--{YEAR}-{SCENARIO_LABEL}.csv', index=False) + + # Create combined plans file with both regular plans and ondemand plans + if _payload_plans is not None: + print("Creating combined plans file of payloads and crowdshipments...") + # Save the combined file + pd.concat([_payload_plans, _ondemand_plans], ignore_index=True).to_csv( + f'{DIRECTORY_SCENARIO}/payloads+crowdshipments--{YEAR}-{SCENARIO_LABEL}.csv', index=False + ) diff --git a/src/main/python/freight/process_frism_plans.py b/src/main/python/freight/process_frism_plans.py new file mode 100644 index 00000000000..1f9b6ab7489 --- /dev/null +++ b/src/main/python/freight/process_frism_plans.py @@ -0,0 +1,1189 @@ +import os + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from scipy import stats +import seaborn as sns + + +# Load the CSV file +import matplotlib.pyplot as plt + +def generate_duration_histogram(durations_minutes, label, output_file, bins=50, log_scale=False): + """ + Generate a histogram of operation durations. + + Parameters: + ----------- + durations_minutes : pandas.Series + Series containing operation durations in minutes + label : str + Label for the histogram title + output_file : str + Path to save the output histogram + bins : int, optional + Number of bins for the histogram (default is 50) + log_scale : bool, optional + Whether to use logarithmic scale for the x-axis (default is False) + + Returns: + -------- + dict + Dictionary containing basic statistics of the durations + """ + # Create a figure + plt.figure(figsize=(12, 6)) + + # Generate histogram + plt.hist(durations_minutes, bins=bins, alpha=0.75, color='steelblue', edgecolor='black') + + # Set logarithmic scale if specified + if log_scale: + plt.xscale('log') + + # Add title and labels + plt.title(f'Distribution of {label}', fontsize=14) + plt.xlabel('Operation Duration (minutes)', fontsize=12) + plt.ylabel('Frequency', fontsize=12) + + # Add grid + plt.grid(axis='y', linestyle='--', alpha=0.7) + + # Calculate and display statistics + mean_duration = durations_minutes.mean() + median_duration = durations_minutes.median() + + # Add text with statistics + stats_text = f'Mean: {mean_duration:.2f} minutes\nMedian: {median_duration:.2f} minutes' + plt.annotate(stats_text, xy=(0.95, 0.95), xycoords='axes fraction', + fontsize=10, ha='right', va='top', + bbox=dict(boxstyle='round,pad=0.5', facecolor='white', alpha=0.7)) + + # Show the plot + plt.tight_layout() + plt.savefig(output_file) + plt.show() + + print(f"Histogram saved as {output_file}") + + # Return basic statistics + return { + 'mean': mean_duration, + 'median': median_duration, + 'min': durations_minutes.min(), + 'max': durations_minutes.max() + } + +def plot_duration_comparison(df, duration_col1, duration_col2, group_col=None, output_file='duration_comparison.png'): + """ + Create a scatter plot comparing two sets of durations, + with optional grouping by a categorical variable. + + Parameters: + ----------- + df : pandas.DataFrame + The DataFrame containing the data + duration_col1 : str + Column name for the first duration in minutes + duration_col2 : str + Column name for the second duration in minutes + group_col : str, optional + Column name for grouping variable (categorical) + output_file : str + Path to save the output plot + + Returns: + -------- + dict + Dictionary containing correlation statistics + """ + # Ensure clean data by removing NaN values + plot_df = df.dropna(subset=[duration_col1, duration_col2]).copy() + + if plot_df.empty: + print(f"No valid data points found with non-null values in both {duration_col1} and {duration_col2}") + return None + + durations1 = plot_df[duration_col1] + durations2 = plot_df[duration_col2] + + # Create a figure + plt.figure(figsize=(12, 8)) + + if group_col is not None and group_col in plot_df.columns: + # Create a grouped scatter plot with different colors + groups = plot_df[group_col].unique() + + # Create a colormap with distinct colors + colors = plt.cm.tab10(np.linspace(0, 1, len(groups))) + + # Plot each group separately + for i, group in enumerate(groups): + group_data = plot_df[plot_df[group_col] == group] + plt.scatter(group_data[duration_col1], group_data[duration_col2], + alpha=0.6, color=colors[i], edgecolor='none', + label=f'{group}') + + # Calculate correlation for this group if enough data points + if len(group_data) > 2: + group_corr, _ = stats.pearsonr(group_data[duration_col1], group_data[duration_col2]) + + # Calculate and plot best fit line for this group + group_slope, group_intercept = np.polyfit(group_data[duration_col1], group_data[duration_col2], 1) + x_line = np.array([group_data[duration_col1].min(), group_data[duration_col1].max()]) + y_line = group_slope * x_line + group_intercept + plt.plot(x_line, y_line, color=colors[i], linewidth=2, + linestyle='--') + + # Add correlation text near the group in the plot + plt.annotate(f'r = {group_corr:.2f}', + xy=(group_data[duration_col1].median(), group_data[duration_col2].median()), + xytext=(10, 0), textcoords='offset points', + fontsize=9, color=colors[i]) + + plt.title(f'Comparison of Two Durations by {group_col}', fontsize=14) + else: + # Generate simple scatter plot if no grouping + plt.scatter(durations1, durations2, alpha=0.5, color='steelblue', edgecolor='none') + plt.title('Comparison of Two Durations', fontsize=14) + + # Add labels and grid + plt.xlabel(f'{duration_col1} (minutes)', fontsize=12) + plt.ylabel(f'{duration_col2} (minutes)', fontsize=12) + plt.grid(linestyle='--', alpha=0.7) + + # Save the plot to a file + plt.savefig(output_file) + plt.close() + + # Return correlation statistics + overall_corr, _ = stats.pearsonr(durations1, durations2) + return {'correlation': overall_corr} + +def plot_duration_vs_weight(df, duration_col, weight_col, group_col=None, output_file='duration_vs_weight.png'): + """ + Create a scatter plot showing the relationship between stop durations and weight, + with optional grouping by a categorical variable. + + Parameters: + ----------- + df : pandas.DataFrame + The DataFrame containing the data + duration_col : str + Column name for operation duration in minutes + weight_col : str + Column name for weight in pounds + group_col : str, optional + Column name for grouping variable (categorical) + output_file : str + Path to save the output plot + + Returns: + -------- + dict + Dictionary containing correlation statistics + """ + # Ensure clean data by removing NaN values + plot_df = df.dropna(subset=[duration_col, weight_col]).copy() + + if plot_df.empty: + print(f"No valid data points found with non-null values in both {duration_col} and {weight_col}") + return None + + # Use durations in minutes directly if already in minutes + durations_minutes = plot_df[duration_col] + weights_lbs = plot_df[weight_col] + + # Create a figure + plt.figure(figsize=(12, 8)) + + if group_col is not None and group_col in plot_df.columns: + # Create a grouped scatter plot with different colors + groups = plot_df[group_col].unique() + + # Create a colormap with distinct colors + colors = plt.cm.tab10(np.linspace(0, 1, len(groups))) + + # Plot each group separately + for i, group in enumerate(groups): + group_data = plot_df[plot_df[group_col] == group] + plt.scatter(group_data[duration_col], group_data[weight_col], + alpha=0.6, color=colors[i], edgecolor='none', + label=f'{group}') + + # Calculate correlation for this group if enough data points + if len(group_data) > 2: + group_corr, _ = stats.pearsonr(group_data[duration_col], group_data[weight_col]) + + # Calculate and plot best fit line for this group + group_slope, group_intercept = np.polyfit(group_data[duration_col], group_data[weight_col], 1) + x_line = np.array([group_data[duration_col].min(), group_data[duration_col].max()]) + y_line = group_slope * x_line + group_intercept + plt.plot(x_line, y_line, color=colors[i], linewidth=2, + linestyle='--') + + # Add correlation text near the group in the plot + plt.annotate(f'r = {group_corr:.2f}', + xy=(group_data[duration_col].median(), group_data[weight_col].median()), + xytext=(10, 0), textcoords='offset points', + fontsize=9, color=colors[i]) + + plt.title(f'Relationship Between Operation Duration and Weight by {group_col}', fontsize=14) + else: + # Generate simple scatter plot if no grouping + plt.scatter(durations_minutes, weights_lbs, alpha=0.5, color='steelblue', edgecolor='none') + plt.title('Relationship Between Operation Duration and Weight', fontsize=14) + + # Add labels and grid + plt.xlabel('Operation Duration (minutes)', fontsize=12) + plt.ylabel('Weight (lbs)', fontsize=12) + plt.grid(linestyle='--', alpha=0.7) + + # Calculate overall correlation coefficient and p-value + corr, p_value = stats.pearsonr(durations_minutes, weights_lbs) + + # Calculate and plot overall best fit line + slope, intercept = np.polyfit(durations_minutes, weights_lbs, 1) + x_line = np.array([durations_minutes.min(), durations_minutes.max()]) + y_line = slope * x_line + intercept + + if group_col is None: + # Only show the overall trend line when not grouping + plt.plot(x_line, y_line, color='red', linewidth=2, + label=f'y = {slope:.2f}x + {intercept:.2f}') + + # Add text with statistics + stats_text = (f'Overall Statistics:\n' + f'Pearson Correlation: {corr:.4f}\n' + f'P-value: {p_value:.4e}\n' + f'Slope: {slope:.4f}\n' + f'Intercept: {intercept:.2f}\n' + f'N = {len(durations_minutes)}') + + plt.annotate(stats_text, xy=(0.05, 0.95), xycoords='axes fraction', + fontsize=10, ha='left', va='top', + bbox=dict(boxstyle='round,pad=0.5', facecolor='white', alpha=0.7)) + + # Add legend if groups are present + if group_col is not None and group_col in plot_df.columns: + plt.legend(title=group_col) + else: + plt.legend() + + # Save the plot + plt.tight_layout() + plt.savefig(output_file) + plt.close() + + print(f"Scatter plot saved as '{output_file}'") + + # Return correlation statistics + return { + 'correlation': corr, + 'p_value': p_value, + 'slope': slope, + 'intercept': intercept, + 'n': len(durations_minutes) + } + + +def load_and_process_austin_data(trip_data_path, output_dir): + """ + Load and process the Austin Commercial Vehicle Survey data + with specific handling for the observed structure + """ + print("Loading Austin data files...") + + # Load both sheets from the trip data file + rec_20 = pd.read_excel(trip_data_path, sheet_name='Rec_20') + rec_21 = pd.read_excel(trip_data_path, sheet_name='Rec_21') + + print(f"Loaded sheet Rec_20 with {len(rec_20)} records") + print(f"Loaded sheet Rec_21 with {len(rec_21)} records") + + # Filter Rec_20 to keep only cargo vehicles (type=1) + if '23. Veh Type 1=Cargo, 2=Service, 3=Service with some Cargo' in rec_20.columns: + rec_20_filtered = rec_20[rec_20['23. Veh Type 1=Cargo, 2=Service, 3=Service with some Cargo'] == 1].copy() + print(f"Filtered Rec_20 to {len(rec_20_filtered)} cargo vehicles") + + # Keep only required columns from Rec_20 + vehicle_columns = [ + '3. Vehicle ID Number', + '22. Year of Vehicle', + '25. Vehicle Fuel Type', + '27. Vehicle Class.' + ] + + # Check if all required columns exist + missing_cols = [col for col in vehicle_columns if col not in rec_20_filtered.columns] + if missing_cols: + print(f"Warning: Missing columns in Rec_20: {missing_cols}") + vehicle_columns = [col for col in vehicle_columns if col in rec_20_filtered.columns] + + rec_20_filtered = rec_20_filtered[vehicle_columns].copy() + + # Create fuel type mapping + fuel_type_map = { + 1: 'Gasoline', + 2: 'Diesel', + 3: 'Propane', + 4: 'Natural Gas', + 5: 'Electricity', + 6: 'Gas/Electric Hybrid', + 96: 'Other', + 98: 'Don\'t Know', + 99: 'Refused' + } + + # Create vehicle class mapping + vehicle_class_map = { + 1: 'Passenger Car', + 2: 'Pick-up', + 3: 'Van (Cargo or Mini)', + 4: 'Sport Utility Vehicle (SUV)', + 5: 'Single Unit 2-axle (6 wheels)', + 6: 'Single Unit 3-axle (10 wheels)', + 7: 'Single Unit 4-axle (14 wheels)', + 8: 'Semi (all Tractor-Trailer Combinations)', + 96: 'Other' + } + + vehicle_class_mapping = { + "Semi (all Tractor-Trailer Combinations)": "Class78Tractor", + "Single Unit 2-axle (6 wheels)": "Class456Vocational", + "Single Unit 3-axle (10 wheels)": "Class456Vocational", + "Single Unit 4-axle (14 wheels)": "Class78Vocational" + } + + # Apply mappings if columns exist + if '25. Vehicle Fuel Type' in rec_20_filtered.columns: + rec_20_filtered['vehicleFuelType'] = rec_20_filtered['25. Vehicle Fuel Type'].map(fuel_type_map) + + if '27. Vehicle Class.' in rec_20_filtered.columns: + rec_20_filtered['vehicleClass'] = rec_20_filtered['27. Vehicle Class.'].map(vehicle_class_map).map(vehicle_class_mapping) + rec_20_filtered.dropna(subset=['vehicleClass'], inplace=True) + + # Rename columns for clarity + rec_20_filtered.rename(columns={ + '3. Vehicle ID Number': 'vehicleId', + '22. Year of Vehicle': 'vehicleModelYear', + }, inplace=True) + + # Drop original columns that have been mapped + if '25. Vehicle Fuel Type' in rec_20_filtered.columns: + rec_20_filtered.drop('25. Vehicle Fuel Type', axis=1, inplace=True) + + if '27. Vehicle Class.' in rec_20_filtered.columns: + rec_20_filtered.drop('27. Vehicle Class.', axis=1, inplace=True) + else: + print("Vehicle type column not found in Rec_20") + return None + + # Filter Rec_21 to keep only records with activity types 4, 5, and 6 + if '8. Type of Activity' in rec_21.columns: + cargo_activity_types = [4, 5, 6] # 4-Delivery of Cargo, 5-Pick up Cargo, 6-Deliver and Pick up Cargo + + # Create activity type mapping + activity_map = { + 4: 'Delivery of Cargo', + 5: 'Pick up Cargo', + 6: 'Deliver and Pick up Cargo' + } + + # Filter records + rec_21_filtered = rec_21[rec_21['8. Type of Activity'].isin(cargo_activity_types)].copy() + print(f"Filtered to {len(rec_21_filtered)} records with cargo activities (types 4, 5, 6)") + + # Add human-readable activity type + rec_21_filtered['activityType'] = rec_21_filtered['8. Type of Activity'].map(activity_map) + else: + print("Column '8. Type of Activity' not found in dataset") + return None + + # Calculate operation duration in minutes + if '29. Arrival Minute' in rec_21_filtered.columns and '31. Departure Minute' in rec_21_filtered.columns: + print("Calculating operation duration from minute-based time columns") + + # Create a mask for first trips (where arrival fields are blank/NaN) + first_trip_mask = rec_21_filtered['28. Arrival Hour'].isna() + + # Create a mask for last trips (where departure fields are blank/NaN) + last_trip_mask = rec_21_filtered['30. Departure Hour'].isna() + + # Initialize the operation duration column with NaN values + rec_21_filtered['operationDurationInMin'] = float('nan') + + # Make a copy of departure hour column to modify + departure_hours = rec_21_filtered['30. Departure Hour'].copy() + + # Replace 0 hour with 24 hours (regardless of minutes) + departure_hours.loc[departure_hours == 0] = 24 + + # Calculate duration only for regular trips (not first or last) + regular_trips = ~(first_trip_mask | last_trip_mask) + rec_21_filtered.loc[regular_trips, 'operationDurationInMin'] = ( + (departure_hours.loc[regular_trips] - + rec_21_filtered.loc[regular_trips, '28. Arrival Hour']) * 60 + + (rec_21_filtered.loc[regular_trips, '31. Departure Minute'] - + rec_21_filtered.loc[regular_trips, '29. Arrival Minute']) + ) + else: + print("Could not find minute-based time columns") + return None + + # Extract cargo weights + if '25. Cargo Weight PU' in rec_21_filtered.columns: + rec_21_filtered['cargoWeightPU'] = rec_21_filtered['25. Cargo Weight PU'] + else: + print("Column '25. Cargo Weight PU' not found, adding empty column") + rec_21_filtered['cargoWeightPU'] = np.nan + + if '26. Cargo Weight DO' in rec_21_filtered.columns: + rec_21_filtered['cargoWeightDO'] = rec_21_filtered['26. Cargo Weight DO'] + else: + print("Column '26. Cargo Weight DO' not found, adding empty column") + rec_21_filtered['cargoWeightDO'] = np.nan + + # Rename Vehicle ID in rec_21_filtered for merging + if '3. Vehicle ID Number' in rec_21_filtered.columns: + rec_21_filtered.rename(columns={'3. Vehicle ID Number': 'vehicleId'}, inplace=True) + + # Merge rec_21_filtered with rec_20_filtered + if 'vehicleId' in rec_21_filtered.columns and 'vehicleId' in rec_20_filtered.columns: + print("Merging trip data with vehicle information") + merged_data = pd.merge( + rec_21_filtered, + rec_20_filtered, + on='vehicleId', + how='inner' # Keep only records that exist in both datasets + ) + print(f"Merged data has {len(merged_data)} records") + else: + print("Vehicle ID column not found for merging") + return None + + # Create output CSV with only the required columns + output_columns = [ + 'vehicleId', + 'vehicleModelYear', + 'vehicleFuelType', + 'vehicleClass', + 'operationDurationInMin', + 'activityType', + 'cargoWeightPU', + 'cargoWeightDO' + ] + + # Check that all output columns exist + missing_cols = [col for col in output_columns if col not in merged_data.columns] + if missing_cols: + print(f"Warning: Missing columns in merged data: {missing_cols}") + output_columns = [col for col in output_columns if col in merged_data.columns] + + output_df = merged_data[output_columns] + + # Save to CSV + csv_path = os.path.join(output_dir, 'austin_cargo_operations.csv') + output_df.to_csv(csv_path, index=False) + print(f"CSV file saved to {csv_path}") + + return merged_data + + +def analyze_durations_by_key(df, key_column, duration_column, output_dir='.', prefix=''): + """ + Analyze and visualize stop durations by a key column (e.g., activity type, land use, vehicle type) + + Parameters: + ----------- + df : pandas.DataFrame + DataFrame containing the data + key_column : str + Column name to group by (e.g., 'activity_type', 'place_type') + duration_column : str + Column name for duration data + output_dir : str + Directory to save output files + prefix : str + Prefix for output filenames + + Returns: + -------- + pandas.DataFrame + DataFrame with aggregated statistics + """ + # Ensure output directory exists + os.makedirs(output_dir, exist_ok=True) + + # Check if columns exist + if key_column not in df.columns: + print(f"Column '{key_column}' not found in DataFrame") + return None + + if duration_column not in df.columns: + print(f"Column '{duration_column}' not found in DataFrame") + return None + + # Group by key column and calculate statistics + stats = df.groupby(key_column)[duration_column].agg([ + 'count', 'mean', 'median', 'std', 'min', 'max' + ]).reset_index() + + # Convert key column to string for plotting + stats[key_column] = stats[key_column].astype(str) + + # Create visualization + plt.figure(figsize=(12, 6)) + plt.bar(stats[key_column], stats['mean'], color='steelblue') + plt.title(f'Average Duration by {key_column}', fontsize=14) + plt.xlabel(key_column, fontsize=12) + plt.ylabel(f'Average Duration (minutes)', fontsize=12) + plt.xticks(rotation=45, ha='right') + plt.grid(axis='y', linestyle='--', alpha=0.7) + plt.tight_layout() + + # Save the plot + output_path = os.path.join(output_dir, f'{prefix}_{key_column.lower()}_duration_analysis.png') + plt.savefig(output_path) + plt.close() + + # Save the data + csv_path = os.path.join(output_dir, f'{prefix}_{key_column.lower()}_duration_analysis.csv') + stats.to_csv(csv_path, index=False) + + print(f"Analysis for {key_column} saved to {output_dir}") + + return stats + +def analyze_austin_operation_durations(data, output_dir='.'): + """ + Analyze operation durations from the Austin CV Survey data + """ + print("Analyzing Austin operation durations...") + + # Ensure output directory exists + os.makedirs(output_dir, exist_ok=True) + + # Check if we have the necessary columns + if 'stop_duration' not in data.columns: + print("Cannot analyze durations - stop_duration column not found") + return None + + # Generate histogram of stop durations + plt.figure(figsize=(12, 6)) + plt.hist(data['stop_duration'], bins=50, alpha=0.75, color='steelblue', edgecolor='black') + plt.title('Distribution of Austin CV Survey Operation Durations', fontsize=14) + plt.xlabel('Operation Duration (minutes)', fontsize=12) + plt.ylabel('Frequency', fontsize=12) + plt.grid(axis='y', linestyle='--', alpha=0.7) + + # Calculate and display statistics + mean_duration = data['stop_duration'].mean() + median_duration = data['stop_duration'].median() + + stats_text = f'Mean: {mean_duration:.2f} minutes\nMedian: {median_duration:.2f} minutes' + plt.annotate(stats_text, xy=(0.95, 0.95), xycoords='axes fraction', + fontsize=10, ha='right', va='top', + bbox=dict(boxstyle='round,pad=0.5', facecolor='white', alpha=0.7)) + + plt.tight_layout() + plt.savefig(os.path.join(output_dir, 'austin_duration_histogram.png')) + plt.close() + + # Analyze by activity type if available + if 'activity_type' in data.columns: + analyze_durations_by_key( + data, + 'activity_type', + 'stop_duration', + output_dir=output_dir, + prefix='austin' + ) + + # Analyze by place type if available + if 'place_type' in data.columns: + analyze_durations_by_key( + data, + 'place_type', + 'stop_duration', + output_dir=output_dir, + prefix='austin' + ) + + # Return statistics + stats = { + 'mean': mean_duration, + 'median': median_duration, + 'min': data['stop_duration'].min(), + 'max': data['stop_duration'].max() + } + + return stats + + +def analyze_cargo_operations(df, output_dir): + """ + Comprehensive analysis of cargo operations data + + Parameters: + ----------- + file_path : str + Path to the CSV file containing cargo operations data + """ + # Display basic information + print("\n===== BASIC INFORMATION =====") + print(f"Dataset shape: {df.shape}") + print("\nFirst few rows:") + print(df.head()) + print("\nColumn information:") + print(df.info()) + print("\nSummary statistics:") + print(df.describe()) + + # Check for missing values + print("\n===== MISSING VALUES =====") + missing = df.isnull().sum() + print(missing[missing > 0]) + + # Clean the data + print("\n===== DATA CLEANING =====") + # Convert operation duration to numeric if not already + if df['operationDurationInMin'].dtype == 'object': + df['operationDurationInMin'] = pd.to_numeric(df['operationDurationInMin'], errors='coerce') + print("Converted operationDurationInMin to numeric") + + # Handle missing values in cargo weights + if 'cargoWeightPU' in df.columns and df['cargoWeightPU'].isnull().sum() > 0: + print(f"Missing values in cargoWeightPU: {df['cargoWeightPU'].isnull().sum()}") + # For analysis purposes, we'll separate pickup and delivery operations + pickup_ops = df[df['activityType'] == 'Pick up Cargo'].copy() + delivery_ops = df[df['activityType'] == 'Delivery of Cargo'].copy() + print(f"Pickup operations: {pickup_ops.shape[0]}, Delivery operations: {delivery_ops.shape[0]}") + + # Remove extreme outliers (if needed) + q1 = df['operationDurationInMin'].quantile(0.01) + q3 = df['operationDurationInMin'].quantile(0.99) + iqr = q3 - q1 + + print(f"Duration statistics before outlier treatment:") + print(f"Min: {df['operationDurationInMin'].min()}, Max: {df['operationDurationInMin'].max()}") + print(f"Mean: {df['operationDurationInMin'].mean():.2f}, Median: {df['operationDurationInMin'].median():.2f}") + print(f"1% percentile: {q1}, 99% percentile: {q3}") + + # Create a copy for analysis without extreme outliers + df_no_outliers = df[(df['operationDurationInMin'] >= q1 - 1.5 * iqr) & + (df['operationDurationInMin'] <= q3 + 1.5 * iqr)].copy() + + print(f"\nRemoved {df.shape[0] - df_no_outliers.shape[0]} extreme outliers") + print(f"Duration statistics after outlier treatment:") + print( + f"Min: {df_no_outliers['operationDurationInMin'].min()}, Max: {df_no_outliers['operationDurationInMin'].max()}") + print( + f"Mean: {df_no_outliers['operationDurationInMin'].mean():.2f}, Median: {df_no_outliers['operationDurationInMin'].median():.2f}") + + # Create visualizations + print("\n===== CREATING VISUALIZATIONS =====") + + # Set up the plotting environment + plt.style.use('ggplot') + sns.set(font_scale=1.2) + + # 1. Distribution of operation durations + plt.figure(figsize=(12, 6)) + + plt.subplot(1, 2, 1) + sns.histplot(df['operationDurationInMin'], kde=True, bins=30) + plt.title('Distribution of Operation Durations') + plt.xlabel('Duration (minutes)') + plt.ylabel('Frequency') + + plt.subplot(1, 2, 2) + sns.histplot(df['operationDurationInMin'], kde=True, log_scale=True, bins=30) + plt.title('Distribution of Operation Durations (Log Scale)') + plt.xlabel('Duration (minutes) - Log Scale') + plt.ylabel('Frequency') + + plt.tight_layout() + plt.savefig(f"{output_dir}/duration_distribution.png") + plt.close() + print("Created duration distribution plot") + + # 2. Operation Duration by Vehicle Model Year + plt.figure(figsize=(14, 8)) + + # Calculate average duration by model year + year_duration = df.groupby('vehicleModelYear')['operationDurationInMin'].agg(['mean', 'median', 'count']) + year_duration = year_duration.reset_index() + + # Plot with size representing count + plt.subplot(1, 2, 1) + sns.scatterplot(data=year_duration, x='vehicleModelYear', y='mean', size='count', sizes=(20, 500), alpha=0.7) + plt.title('Average Operation Duration by Vehicle Model Year') + plt.xlabel('Vehicle Model Year') + plt.ylabel('Average Duration (minutes)') + + plt.subplot(1, 2, 2) + sns.boxplot(data=df, x='vehicleModelYear', y='operationDurationInMin') + plt.title('Operation Duration Distribution by Vehicle Model Year') + plt.xlabel('Vehicle Model Year') + plt.ylabel('Duration (minutes)') + plt.xticks(rotation=45) + + plt.tight_layout() + plt.savefig(f"{output_dir}/duration_by_year.png") + plt.close() + print("Created duration by vehicle year plot") + + # 3. Operation Duration by Activity Type + if 'activityType' in df.columns: + plt.figure(figsize=(14, 6)) + + plt.subplot(1, 2, 1) + sns.boxplot(data=df, x='activityType', y='operationDurationInMin') + plt.title('Operation Duration by Activity Type') + plt.xlabel('Activity Type') + plt.ylabel('Duration (minutes)') + + plt.subplot(1, 2, 2) + activity_counts = df['activityType'].value_counts() + sns.barplot(x=activity_counts.index, y=activity_counts.values) + plt.title('Count by Activity Type') + plt.xlabel('Activity Type') + plt.ylabel('Count') + + plt.tight_layout() + plt.savefig(f"{output_dir}/duration_by_activity.png") + plt.close() + print("Created duration by activity type plot") + + # 4. Operation Duration by Cargo Weight (for pickup operations) + pickup_ops = df[df['activityType'] == 'Pick up Cargo'].copy() + if 'cargoWeightPU' in df.columns and not pickup_ops.empty: + plt.figure(figsize=(14, 8)) + + # Remove NaN values for this analysis + pickup_with_weight = pickup_ops.dropna(subset=['cargoWeightPU']) + + if not pickup_with_weight.empty: + plt.subplot(1, 2, 1) + sns.scatterplot(data=pickup_with_weight, x='cargoWeightPU', y='operationDurationInMin', alpha=0.5) + plt.title('Operation Duration vs Cargo Weight (Pick Up)') + plt.xlabel('Cargo Weight (Pick Up)') + plt.ylabel('Duration (minutes)') + + plt.subplot(1, 2, 2) + # Create weight bins + pickup_with_weight['weight_bin'] = pd.cut(pickup_with_weight['cargoWeightPU'], bins=10) + sns.boxplot(data=pickup_with_weight, x='weight_bin', y='operationDurationInMin') + plt.title('Operation Duration by Cargo Weight Range (Pick Up)') + plt.xlabel('Cargo Weight Range') + plt.ylabel('Duration (minutes)') + plt.xticks(rotation=90) + + plt.tight_layout() + plt.savefig(f"{output_dir}/duration_by_weight_pickup.png") + plt.close() + print("Created duration by cargo weight plot for pickup operations") + + # 5. Operation Duration by Cargo Weight (for delivery operations) + delivery_ops = df[df['activityType'] == 'Delivery of Cargo'].copy() + if 'cargoWeightDO' in df.columns and not delivery_ops.empty: + plt.figure(figsize=(14, 8)) + + # Remove NaN values for this analysis + delivery_with_weight = delivery_ops.dropna(subset=['cargoWeightDO']) + + if not delivery_with_weight.empty: + plt.subplot(1, 2, 1) + sns.scatterplot(data=delivery_with_weight, x='cargoWeightDO', y='operationDurationInMin', alpha=0.5) + plt.title('Operation Duration vs Cargo Weight (Delivery)') + plt.xlabel('Cargo Weight (Delivery)') + plt.ylabel('Duration (minutes)') + + plt.subplot(1, 2, 2) + # Create weight bins + delivery_with_weight['weight_bin'] = pd.cut(delivery_with_weight['cargoWeightDO'], bins=10) + sns.boxplot(data=delivery_with_weight, x='weight_bin', y='operationDurationInMin') + plt.title('Operation Duration by Cargo Weight Range (Delivery)') + plt.xlabel('Cargo Weight Range') + plt.ylabel('Duration (minutes)') + plt.xticks(rotation=90) + + plt.tight_layout() + plt.savefig(f"{output_dir}/duration_by_weight_delivery.png") + plt.close() + print("Created duration by cargo weight plot for delivery operations") + + # 6. Operation Duration by Vehicle Fuel Type + plt.figure(figsize=(12, 6)) + + fuel_counts = df['vehicleFuelType'].value_counts() + + plt.subplot(1, 2, 1) + sns.boxplot(data=df, x='vehicleFuelType', y='operationDurationInMin') + plt.title('Operation Duration by Fuel Type') + plt.xlabel('Fuel Type') + plt.ylabel('Duration (minutes)') + + plt.subplot(1, 2, 2) + sns.barplot(x=fuel_counts.index, y=fuel_counts.values) + plt.title('Count of Vehicles by Fuel Type') + plt.xlabel('Fuel Type') + plt.ylabel('Count') + + plt.tight_layout() + plt.savefig(f"{output_dir}/duration_by_fuel_type.png") + plt.close() + print("Created duration by fuel type plot") + + # 7. Operation Duration by Vehicle Class + plt.figure(figsize=(12, 6)) + + class_counts = df['vehicleClass'].value_counts() + + plt.subplot(1, 2, 1) + sns.boxplot(data=df, x='vehicleClass', y='operationDurationInMin') + plt.title('Operation Duration by Vehicle Class') + plt.xlabel('Vehicle Class') + plt.ylabel('Duration (minutes)') + plt.xticks(rotation=45) + + plt.subplot(1, 2, 2) + sns.barplot(x=class_counts.index, y=class_counts.values) + plt.title('Count of Vehicles by Class') + plt.xlabel('Vehicle Class') + plt.ylabel('Count') + plt.xticks(rotation=45) + + plt.tight_layout() + plt.savefig(f"{output_dir}/duration_by_vehicle_class.png") + plt.close() + print("Created duration by vehicle class plot") + + # 8. Correlation heatmap for numerical variables + plt.figure(figsize=(10, 8)) + + # Select only numeric columns + numeric_df = df.select_dtypes(include=[np.number]) + + # 9. Vehicle ID analysis - operations per vehicle + vehicle_ops = df.groupby('vehicleId').size().reset_index(name='operation_count') + vehicle_ops = vehicle_ops.sort_values('operation_count', ascending=False) + + plt.figure(figsize=(12, 6)) + plt.bar(range(len(vehicle_ops[:20])), vehicle_ops['operation_count'][:20]) + plt.xticks(range(len(vehicle_ops[:20])), vehicle_ops['vehicleId'][:20], rotation=45) + plt.title('Number of Operations by Vehicle ID (Top 20)') + plt.xlabel('Vehicle ID') + plt.ylabel('Number of Operations') + plt.tight_layout() + plt.savefig(f"{output_dir}/operations_by_vehicle.png") + plt.close() + print("Created operations by vehicle plot") + + # Statistical analysis + print("\n===== STATISTICAL ANALYSIS =====") + + # 1. Summary by vehicle class + class_summary = df.groupby('vehicleClass')['operationDurationInMin'].agg( + ['count', 'mean', 'median', 'std', 'min', 'max']) + print("\nOperation Duration Summary by Vehicle Class:") + print(class_summary) + + # 2. Summary by fuel type + fuel_summary = df.groupby('vehicleFuelType')['operationDurationInMin'].agg( + ['count', 'mean', 'median', 'std', 'min', 'max']) + print("\nOperation Duration Summary by Fuel Type:") + print(fuel_summary) + + # 3. Summary by activity type + if 'activityType' in df.columns: + activity_summary = df.groupby('activityType')['operationDurationInMin'].agg( + ['count', 'mean', 'median', 'std', 'min', 'max']) + print("\nOperation Duration Summary by Activity Type:") + print(activity_summary) + + # 4. Summary by vehicle model year + year_summary = df.groupby('vehicleModelYear')['operationDurationInMin'].agg( + ['count', 'mean', 'median', 'std', 'min', 'max']) + print("\nOperation Duration Summary by Vehicle Model Year:") + print(year_summary) + + # 5. Correlation analysis + print("\nCorrelation with Operation Duration:") + for col in numeric_df.columns: + if col != 'operationDurationInMin': + correlation = df['operationDurationInMin'].corr(df[col]) + print(f"{col}: {correlation:.4f}") + + # 6. Top 10 longest operations + print("\nTop 10 Longest Operations:") + print(df.nlargest(10, 'operationDurationInMin')[ + ['vehicleId', 'vehicleModelYear', 'vehicleFuelType', 'vehicleClass', 'operationDurationInMin', + 'activityType']]) + + # 7. Top 10 shortest operations (excluding zeros) + print("\nTop 10 Shortest Operations (excluding zeros):") + print(df[df['operationDurationInMin'] > 0].nsmallest(10, 'operationDurationInMin')[ + ['vehicleId', 'vehicleModelYear', 'vehicleFuelType', 'vehicleClass', 'operationDurationInMin', + 'activityType']]) + + # 8. Zero duration operations + zero_durations = df[df['operationDurationInMin'] == 0] + print( + f"\nNumber of zero-duration operations: {zero_durations.shape[0]}") + + # 9. Duration buckets analysis + duration_buckets = [ + (0, 0), + (0, 15), + (15, 30), + (30, 60), + (60, 120), + (120, 240), + (240, 480), + (480, 1000), + (1000, float('inf')) + ] + + bucket_labels = [ + 'Zero', + '0-15 min', + '15-30 min', + '30-60 min', + '1-2 hours', + '2-4 hours', + '4-8 hours', + '8-16 hours', + '16+ hours' + ] + + bucket_counts = [] + for i, (lower, upper) in enumerate(duration_buckets): + if i == 0: # Zero duration case + count = (df['operationDurationInMin'] == 0).sum() + else: + count = ((df['operationDurationInMin'] > lower) & (df['operationDurationInMin'] <= upper)).sum() + bucket_counts.append(count) + + plt.figure(figsize=(12, 6)) + plt.bar(bucket_labels, bucket_counts) + plt.title('Operation Counts by Duration Buckets') + plt.xlabel('Duration Bucket') + plt.ylabel('Count') + plt.xticks(rotation=45) + plt.tight_layout() + plt.savefig(f"{output_dir}/duration_buckets.png") + plt.close() + print("Created duration buckets analysis") + + # Save summary to CSV + summary_df = pd.DataFrame({ + 'Metric': ['Total Operations', 'Average Duration (min)', 'Median Duration (min)', + 'Min Duration (min)', 'Max Duration (min)', 'Std Dev Duration (min)', + 'Zero Duration Operations', 'Operations > 60 min', 'Operations > 120 min', + 'Operations > 480 min', 'Operations > 1000 min'], + 'Value': [df.shape[0], + df['operationDurationInMin'].mean(), + df['operationDurationInMin'].median(), + df['operationDurationInMin'].min(), + df['operationDurationInMin'].max(), + df['operationDurationInMin'].std(), + zero_durations.shape[0], + df[df['operationDurationInMin'] > 60].shape[0], + df[df['operationDurationInMin'] > 120].shape[0], + df[df['operationDurationInMin'] > 480].shape[0], + df[df['operationDurationInMin'] > 1000].shape[0]] + }) + + summary_df.to_csv(f"{output_dir}/summary_statistics.csv", index=False) + class_summary.to_csv(f"{output_dir}/class_summary.csv") + fuel_summary.to_csv(f"{output_dir}/fuel_summary.csv") + year_summary.to_csv(f"{output_dir}/year_summary.csv") + + if 'activityType' in df.columns: + activity_summary.to_csv(f"{output_dir}/activity_summary.csv") + + # Create a comprehensive report + with open(f"{output_dir}/analysis_report.txt", "w") as f: + f.write("=== CARGO OPERATIONS ANALYSIS REPORT ===\n\n") + f.write(f"Total Records: {df.shape[0]}\n\n") + + f.write("=== SUMMARY STATISTICS ===\n") + for i, row in summary_df.iterrows(): + f.write(f"{row['Metric']}: {row['Value']}\n") + + f.write("\n=== VEHICLE INFORMATION ===\n") + f.write(f"Total unique vehicles: {df['vehicleId'].nunique()}\n") + f.write(f"Vehicle model years range: {df['vehicleModelYear'].min()} to {df['vehicleModelYear'].max()}\n") + f.write(f"Vehicle fuel types: {', '.join(df['vehicleFuelType'].unique())}\n") + f.write(f"Vehicle classes: {', '.join(df['vehicleClass'].unique())}\n") + + + f.write("\n=== CORRELATION ANALYSIS ===\n") + f.write("Correlation with Operation Duration:\n") + for col in numeric_df.columns: + if col != 'operationDurationInMin': + correlation = df['operationDurationInMin'].corr(df[col]) + f.write(f"{col}: {correlation:.4f}\n") + + f.write("\n=== NOTABLE OBSERVATIONS ===\n") + # Add any notable observations here based on the analysis + if df['operationDurationInMin'].max() > 1000: + f.write("- Some operations have extremely long durations (over 16 hours)\n") + + if zero_durations.shape[0] > 0: + f.write(f"- {zero_durations.shape[0]} operations have zero duration\n") + + # Add vehicle class specific observations + for vehicle_class in df['vehicleClass'].unique(): + class_data = df[df['vehicleClass'] == vehicle_class] + avg_duration = class_data['operationDurationInMin'].mean() + f.write(f"- {vehicle_class} vehicles have an average operation duration of {avg_duration:.2f} minutes\n") + + f.write("\n=== CONCLUSION ===\n") + f.write( + "This analysis provides insights into the cargo operations data, highlighting patterns in operation durations across different vehicle types, model years, and activity types.\n") + + print(f"\nAnalysis complete. Results saved to {output_dir}/") + print(f"A comprehensive report has been generated at {output_dir}/analysis_report.txt") + + return df + + +def main(): + payloads = pd.read_csv("outputs/payloads_test.csv") + payloads['operationDurationInMin'] = payloads['operationDurationInSec'] / 60 + payloads['operationDurationInMinOG'] = payloads['operationDurationInSecOG'] / 60 + payloads['weightInLbs'] = payloads['weightInKg'] * 2.20462 + + generate_duration_histogram( + payloads["operationDurationInMin"], + label="Model Durations", + output_file='outputs/model_duration_histogram.png', + log_scale=False + ) + + generate_duration_histogram( + payloads["operationDurationInMinOG"], + label="FRISM Durations", + output_file='outputs/frism_duration_histogram.png', + log_scale=False + ) + + plot_duration_comparison( + payloads, + duration_col1='operationDurationInMin', + duration_col2='operationDurationInMinOG', + group_col='requestType', + output_file='outputs/mode_duration_vs_frism_duration_by_pudo.png' + ) + + plot_duration_vs_weight( + payloads[payloads["requestType"] == "loading"], + duration_col='operationDurationInMin', + weight_col='weightInLbs', + group_col=None, + output_file='outputs/duration_vs_weight_pu.png' + ) + + plot_duration_vs_weight( + payloads[payloads["requestType"] == "unloading"], + duration_col='operationDurationInMin', + weight_col='weightInLbs', + group_col=None, + output_file='outputs/duration_vs_weight_do.png' + ) + + +def main2(): + """ + Main function to run the analysis + """ + print("Starting Commercial Vehicle Operation Duration Analysis") + + # File paths + austin_dir = os.path.expanduser("~/Workspace/Simulation/sfbay/data/Austin_2017") + frism_dir = os.path.expanduser("~/Workspace/Simulation/sfbay/frism/2024-01-23/Baseline") + output_dir = os.path.expanduser("~/Workspace/Simulation/sfbay/data/Austin_2017/output") + + # Ensure output directory exists + os.makedirs(output_dir, exist_ok=True) + + # Process Austin CV Survey data and generate CSV file + try: + print("\nProcessing Austin Commercial Vehicle Survey data...") + trip_data_path = f"{austin_dir}/Raw 2017-2018 Austin Commercial Vehicle Travel Survey Data for UT and ANL.xlsx" + + # Load, process, and generate CSV file + austin_data = load_and_process_austin_data(trip_data_path, output_dir) + austin_data["cargoWeightPUdiffDO"] = austin_data["cargoWeightPU"] - austin_data["cargoWeightDO"] + + analyze_cargo_operations(austin_data, output_dir) + + plot_duration_vs_weight( + austin_data[austin_data["activityType"]=="Pick up Cargo"], + duration_col='operationDurationInMin', + weight_col='cargoWeightPU', + group_col=None, + output_file=os.path.join(output_dir, 'austin_duration_vs_weight_pu.png') + ) + + plot_duration_vs_weight( + austin_data[austin_data["activityType"]=="Delivery of Cargo"], + duration_col='operationDurationInMin', + weight_col='cargoWeightDO', + group_col=None, + output_file=os.path.join(output_dir, 'austin_duration_vs_weight_do.png') + ) + + plot_duration_vs_weight( + austin_data[austin_data["activityType"]=="Deliver and Pick up Cargo"], + duration_col='operationDurationInMin', + weight_col='cargoWeightPUdiffDO', + group_col=None, + output_file=os.path.join(output_dir, 'austin_duration_vs_weight_pu_diff_do.png') + ) + + except Exception as e: + print(f"Error processing Austin data: {e}") + import traceback + traceback.print_exc() + + # Process FRISM plan data + try: + print("\nProcessing FRISM plan data...") + frism_b2b = pd.read_csv(f"{frism_dir}/B2B_all_payload_sBase_y2018.csv") + frism_b2c = pd.read_csv(f"{frism_dir}/B2C_all_payload_sBase_y2018.csv") + frism_data = pd.concat([frism_b2b, frism_b2c], ignore_index=True) + + # Ensure weight column is processed + if 'weightInlb' in frism_data.columns: + frism_data["weightInlbAbs"] = frism_data["weightInlb"].abs() + + # Generate histogram + generate_duration_histogram( + frism_data["operationDurationInSec"], + ) + + # Analyze by categorical variables if available + if 'operationType' in frism_data.columns: + analyze_durations_by_key( + frism_data, + 'operationType', + 'operationDurationInSec', + output_dir=output_dir, + prefix='frism' + ) + + print("FRISM data processing complete.") + except Exception as e: + print(f"Error processing FRISM data: {e}") + + print("\nAnalysis complete!") + print(f"Results saved to {output_dir} directory") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/main/python/freight/utils.py b/src/main/python/freight/utils.py new file mode 100644 index 00000000000..49c66731524 --- /dev/null +++ b/src/main/python/freight/utils.py @@ -0,0 +1,44 @@ + + +def read_csv(file_path): + import pandas as pd + compression = None + if file_path.endswith(".gz"): + compression = 'gzip' + return pd.read_csv(file_path, sep=",", index_col=None, header=0, compression=compression) + + +def read_csv_in_chunks(file_path): + import pandas as pd + compression = None + if file_path.endswith(".gz"): + compression = 'gzip' + # Read the large csv file in chunks + chunk_size = 5_000_000 # This will depend on your available memory + chunks = [] + for chunk in pd.read_csv(file_path, chunksize=chunk_size, sep=",", index_col=None, header=0, compression=compression): + # Process each chunk here if necessary, for example: + chunks.append(chunk) + # Concatenate all chunks into one DataFrame + return pd.concat(chunks, ignore_index=True) + + +def read_csv_in_parallel(file_path): + import dask.dataframe as dd + compression = None + if file_path.endswith(".gz"): + compression = 'gzip' + return dd.read_csv(file_path, compression=compression, blocksize=None) + + +def print2(file_path, msg): + with open(file_path + ".out", 'w') as f: + print(msg) + print(msg, file=f) + + +def construct_events_file_path(workspace, city, scenario, batch, iteration=0): + import os + base_dir = workspace+city+'/beam/runs/'+scenario+'/'+batch+"/" + filename = str(iteration)+'.events.csv.gz' + return os.path.expanduser(base_dir + filename) \ No newline at end of file diff --git a/src/main/python/network_validation/DownloadSFBay-2-OSMNX_3.ipynb b/src/main/python/network_validation/DownloadSFBay-2-OSMNX_3.ipynb new file mode 100644 index 00000000000..6b8fe828c1f --- /dev/null +++ b/src/main/python/network_validation/DownloadSFBay-2-OSMNX_3.ipynb @@ -0,0 +1,1141 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "22eeb57e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.0.1\n" + ] + } + ], + "source": [ + "import osmnx as ox\n", + "import networkx as nx\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import networkx as nx\n", + "import osmnx as ox\n", + "import matplotlib.colors as mcolors\n", + "import pickle\n", + "import contextily as ctx\n", + "import subprocess\n", + "import xml.etree.ElementTree as ET\n", + "import numpy as np\n", + "\n", + "print(ox.__version__)\n", + "\n", + "#in the settings specify a single date" + ] + }, + { + "cell_type": "markdown", + "id": "2198fe24", + "metadata": {}, + "source": [ + "# Define Scenario Configs" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "697e0f9d", + "metadata": {}, + "outputs": [], + "source": [ + "studyArea = 'sfbay'\n", + "\n", + "simpl_intersections = 2\n", + "splitLinksBy = [\"highway\",\"lanes\", \"maxspeed\", \"hgv\", \"mgv\"]\n", + "#Define custom filters\n", + "cf1 = '[\"highway\"~\"motorway|primary|trunk|secondary|tertiary|motorway_link|trunk_link|primary_link|secondary_link|tertiary_link|unclassified\"]'\n", + "cf2 = '[\"highway\"~\"residential\"]'\n", + "cf3 = '[\"highway\"~\"motorway|primary|residential|trunk|secondary|tertiary|motorway_link|trunk_link|primary_link|secondary_link|tertiary_link|unclassified\"]'\n", + "\n", + "desired_tags = [\"maxweight\", \"hgv\", \"maxweight:hgv\", \"maxlength\", \"maxaxleload\"]\n", + "ox.settings.useful_tags_way = list(ox.settings.useful_tags_way) + desired_tags\n", + "\n", + "# Filter access = private ... look at edge and nodes attirbtues for more\n", + "\n", + "if studyArea == 'sfbay':\n", + " is_addresses = False\n", + " is_bboxes = False\n", + " is_places = True\n", + " is_points = False\n", + " is_polygons = False\n", + " is_xmls = False\n", + " ######## PLACE ##############################\n", + "\n", + " # Input places (list of place names)\n", + " places = [\n", + " {\"county\": \"San Francisco\", \"state\": \"California\"},\n", + " {\"county\": \"San Francisco\", \"state\": \"California\"},\n", + " {\"county\": \"Alameda\", \"state\": \"California\"},\n", + " {\"county\": \"Contra Costa\", \"state\": \"California\"},\n", + " {\"county\": \"Marin\", \"state\": \"California\"},\n", + " {\"county\": \"Napa\", \"state\": \"California\"},\n", + " {\"county\": \"San Mateo\", \"state\": \"California\"},\n", + " {\"county\": \"Santa Clara\", \"state\": \"California\"},\n", + " {\"county\": \"Solano\", \"state\": \"California\"},\n", + " {\"county\": \"Sonoma\", \"state\": \"California\"},\n", + " ]\n", + "\n", + "\n", + " places_filters = {\n", + " \"network_type\": \"drive\", \n", + " \"simplify\": False, \n", + " \"retain_all\": True, \n", + " \"truncate_by_edge\": True,\n", + " \"which_result\": None, \n", + " \"custom_filter\": [\n", + " cf3,cf1,cf1,cf1,cf1,cf1,cf1,cf1,cf1\n", + " ]}\n", + " \n", + "\n", + "else:\n", + "\n", + " is_addresses = False\n", + " is_bboxes = False\n", + " is_places = True\n", + " is_points = False\n", + " is_polygons = False\n", + " is_xmls = False\n", + "\n", + " studyArea = 'NewYork'\n", + "\n", + "\n", + " # 0.00008983 = 10m\n", + " simpl_intersections = 2\n", + " splitLinksBy = [\"highway\",\"lanes\", \"maxspeed\"]\n", + "\n", + " #Define custom filters\n", + " cf1 = '[\"highway\"~\"motorway|primary|trunk|secondary|tertiary|motorway_link|trunk_link|primary_link|secondary_link|tertiary_link|unclassified\"]'\n", + " cf3 = '[\"highway\"~\"residential|motorway|primary|trunk|secondary|tertiary|motorway_link|trunk_link|primary_link|secondary_link|tertiary_link|unclassified\"]'\n", + " cf2 = '[\"highway\"~\"residential\"]'\n", + " cf_main_highways = '[\"highway\"=\"motorway\"]'\n", + "\n", + " ############################## ADDRESS ##############################\n", + " addresses = [\"San Francisco, CA, USA\", \"San Francisco, CA, USA\"]\n", + " # addresses = [\"Milano, IT\", \"Milano, IT\"]\n", + " addresses_filters = {\n", + " \"dist\": 5000, \n", + " \"dist_type\": 'bbox', # \"network\", \"bbox\" if “bbox”, retain only those nodes within a \n", + " # bounding box of the distance parameter. if “network”, retain \n", + " # only those nodes within some network distance from the center-most node.\n", + " \"network_type\": \"drive\", #all\", \"all_public\", \"bike\", \"drive\", \"drive_service\", \"walk\"\n", + " \"simplify\": False, \n", + " \"retain_all\": True, # if True, return the entire graph even if it is not connected. otherwise, \n", + " # retain only the largest weakly connected component.\n", + " \"truncate_by_edge\": None, # if True, retain nodes outside bounding box if at least one \n", + " # of node’s neighbors is within the bounding box\n", + " \"custom_filter\": [cf2, cf1] #'[\"highway\"~\"motorway|trunk\"]' ‘[“power”~”line”]’ or ‘[“highway”~”motorway|trunk”]’.\n", + " }\n", + "\n", + " ############################## BBOX ##############################\n", + "\n", + " #Input bounding boxes (list of tuples representing bounding box coordinates)\n", + " bboxes = [(37.8, 37.7, -122.5, -122.4), (37.9, 37.8, -122.3, -122.2)]\n", + " bboxes_filters = {\n", + " \"network_typeTrue\": [\"drive\", \"bike\"], \n", + " \"simplify\": [False, False], \n", + " \"retain_all\": True, \n", + " \"truncate_by_edge\": [True, True], \n", + " \"custom_filter\": [cf2, cf1] \n", + " }\n", + "\n", + " ############################## PLACE ##############################\n", + "\n", + " # Input places (list of place names)\n", + " places = [\n", + " {\"county\": \"San Francisco\", \"state\": \"California\"},\n", + " {\"county\": \"San Francisco\", \"state\": \"California\"},\n", + " {\"county\": \"Alameda\", \"state\": \"California\"},\n", + " {\"county\": \"Contra Costa\", \"state\": \"California\"},\n", + " {\"county\": \"Marin\", \"state\": \"California\"},\n", + " {\"county\": \"Napa\", \"state\": \"California\"},\n", + " {\"county\": \"San Mateo\", \"state\": \"California\"},\n", + " {\"county\": \"Santa Clara\", \"state\": \"California\"},\n", + " {\"county\": \"Solano\", \"state\": \"California\"},\n", + " {\"county\": \"Sonoma\", \"state\": \"California\"},]\n", + "\n", + "\n", + " places_filters = {\n", + " \"network_type\": \"drive\",\n", + " \"simplify\": False, \n", + " \"retain_all\": True, \n", + " \"truncate_by_edge\": False, \n", + " \"which_result\": None, \n", + " \"custom_filter\": [\n", + " cf3,cf1,cf1,cf1,cf1,cf1,cf1,cf1,cf1,cf1\n", + " ]}\n", + "\n", + " ############################## POINT ##############################\n", + "\n", + " # Input points (list of tuples, each containing (latitude, longitude))\n", + " points = [(37.556036, -122.268709)]\n", + "\n", + " # San Francisco and Oakland\n", + " points_filters = {\n", + " \"dist\": [1000], # Retain only those nodes within this many meters of the center of the graph\n", + " \"dist_type\": 'bbox', \n", + " \"network_type\": [\"drive\", \"bike\"], \n", + " \"simplify\": [False, False], \n", + " \"retain_all\": True, \n", + " \"truncate_by_edge\": [False, True], \n", + " \"custom_filter\": cf1\n", + " }\n", + "\n", + " # Input polygons (using geocode to get polygon boundaries)\n", + " # (shapely.geometry.Polygon or shapely.geometry.MultiPolygon) – the shape to get network data within. \n", + " # coordinates should be in unprojected latitude-longitude degrees (EPSG:4326).\n", + "\n", + " ############################## POLYGON ##############################\n", + "\n", + "\n", + " # polygons = [ox.geocode_to_gdf(\"Downtown San Francisco\"), ox.geocode_to_gdf(\"Oakland\")]\n", + " polygons_filters = {\n", + " \"network_type\": [\"drive\", \"bike\"], \n", + " \"simplify\": [False, False], \n", + " \"retain_all\": True, \n", + " \"truncate_by_edge\": [True, True], \n", + " \"custom_filter\": '[\"building\"~\"yes\"]' \n", + " }\n", + "\n", + " # Input XML files (paths to files that contain OSM data in XML format)\n", + " xmls = [\"/path/to/sf.osm\", \"/path/to/berkeley.osm\"]\n", + " xmls_filters = {\n", + " \"bidirectional\": False, ####\n", + " \"simplify\": [False, False], \n", + " \"retain_all\": True, \n", + " \"encoding\": \"utf-8\", #### \n", + " \"custom_filter\": '[\"highway\"~\"residential\"]' \n", + " }" + ] + }, + { + "cell_type": "markdown", + "id": "522e01b1", + "metadata": {}, + "source": [ + "# INPUTS" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "3b5761ae", + "metadata": {}, + "outputs": [], + "source": [ + "# Generate the graph in 6 different ways\n", + "# Turn on the methods to be used, and insert the inputs\n", + "# Use lists to combine multiple graphs, and single values for common filter parameters\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "6002d0ff", + "metadata": {}, + "source": [ + "# FUNCTIONS" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "938734be", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "# Helper function to get the appropriate value from the filter\n", + "def get_filter_value(filter_param, index, total_count):\n", + " if isinstance(filter_param, list):\n", + " # If the parameter is a list, return the value for the current index\n", + " return filter_param[index % len(filter_param)]\n", + " else:\n", + " # If the parameter is a single value, return the same value for all\n", + " return filter_param\n", + "\n", + "# Helper function to apply filters dynamically\n", + "def apply_filters(filters, index, total_count):\n", + " return {key: get_filter_value(value, index, total_count) for key, value in filters.items()}\n", + "\n", + "# Generic function to generate graphs based on a method and a list of inputs\n", + "def generate_graphs(inputs, filters, graph_function):\n", + " graphs = []\n", + " for i, input_data in enumerate(inputs):\n", + " # Dynamically apply filters based on index\n", + " print(input_data)\n", + " dynamic_filters = apply_filters(filters, i, len(inputs))\n", + " print(dynamic_filters)\n", + " graph = graph_function(input_data, **dynamic_filters)\n", + "# plot(graph, f'{input_data}_{str(simpl_intersections)}_original_graph')\n", + " graphs.append(graph)\n", + " return nx.compose_all(graphs) if graphs else None\n", + "\n", + "# Specific functions using the generate_graphs utility\n", + "\n", + "def get_graph_from_address(addresses, filters):\n", + " return generate_graphs(addresses, filters, ox.graph_from_address)\n", + "\n", + "def get_graph_from_bbox(bboxes, filters):\n", + " return generate_graphs(bboxes, filters, lambda bbox, **kwargs: ox.graph_from_bbox(*bbox, **kwargs))\n", + "\n", + "def get_graph_from_place(places, filters):\n", + " return generate_graphs(places, filters, ox.graph_from_place)\n", + "\n", + "def get_graph_from_point(points, filters):\n", + " return generate_graphs(points, filters, ox.graph_from_point)\n", + "\n", + "def get_graph_from_polygon(polygons, filters):\n", + " return generate_graphs(polygons, filters, lambda polygon, **kwargs: ox.graph_from_polygon(polygon.geometry[0], **kwargs))\n", + "\n", + "def get_graph_from_xml(xmls, filters):\n", + " return generate_graphs(xmls, filters, ox.graph_from_xml)\n", + "\n", + "\n", + "# Function to generate and combine graphs\n", + "def combine_graphs():\n", + " combined_graphs = []\n", + "\n", + " if is_addresses:\n", + " address_graph = get_graph_from_address(addresses, addresses_filters)\n", + " if address_graph is not None:\n", + " combined_graphs.append(address_graph)\n", + " \n", + " if is_bboxes:\n", + " bbox_graph = get_graph_from_bbox(bboxes, bboxes_filters)\n", + " if bbox_graph is not None:\n", + " combined_graphs.append(bbox_graph)\n", + " \n", + " if is_places:\n", + " place_graph = get_graph_from_place(places, places_filters)\n", + " if place_graph is not None:\n", + " combined_graphs.append(place_graph)\n", + " \n", + " if is_points:\n", + " point_graph = get_graph_from_point(points, points_filters)\n", + " if point_graph is not None:\n", + " combined_graphs.append(point_graph)\n", + " \n", + " if is_polygons:\n", + " polygon_graph = get_graph_from_polygon(polygons, polygons_filters)\n", + " if polygon_graph is not None:\n", + " combined_graphs.append(polygon_graph)\n", + " \n", + " if is_xmls:\n", + " xml_graph = get_graph_from_xml(xmls, xmls_filters)\n", + " if xml_graph is not None:\n", + " combined_graphs.append(xml_graph)\n", + "\n", + " # Return the combined graph if there are any valid graphs, else return None\n", + " return nx.compose_all(combined_graphs) if combined_graphs else None\n", + "\n", + "def plot(G, name):\n", + " fig, ax = ox.plot.plot_graph(\n", + " G,\n", + " bgcolor=\"#FFFFFF\", # Light background\n", + "# node_color=\"#00FFAA\", # Bright teal nodes\n", + " node_color=\"#333333\", # Bright teal nodes\n", + " node_size=0.02,\n", + " node_edgecolor='none', # Node size 2.5\n", + "# node_alpha=0.8, # Node transparency\n", + "# node_edgecolor=\"#333333\", # Dark edges around nodes\n", + " node_zorder=3, # Nodes above edges\n", + " edge_color=\"#FF5A5F\", # Bright coral edges\n", + " edge_linewidth=0.2, # Edge thickness 0.5\n", + " edge_alpha=0.8, # Edge transparency\n", + " show=False, # Do not display immediately\n", + " close=False # Keep the plot open for saving\n", + " )\n", + " \n", + " ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)\n", + "\n", + " # 3. Calculate statistics\n", + " num_nodes = len(G.nodes)\n", + " num_edges = len(G.edges)\n", + " # Total length in meters\n", + " total_length = sum(data.get('length', 0) for u, v, key, data in G.edges(keys=True, data=True))\n", + "\n", + " # 4. Add title with statistics\n", + " title = (\n", + " f\"Nodes: {num_nodes} | Edges: {num_edges} | Total Length: {total_length/1000:.2f} km\"\n", + " )\n", + " ax.set_title(title, fontsize=15, fontweight='bold', color='black', pad=20)\n", + " \n", + "\n", + " # 5. Save the figure with 600 DPI\n", + " fig.savefig(f'{name}.png', dpi=600, bbox_inches='tight')\n", + " \n", + "# Helper function to plot graph by attribute with legend\n", + "def plot_graph_by_attribute(G, attribute, title, figsize=(12, 12)):\n", + " # Extract values of the specified attribute from the edges\n", + " attribute_values = [G.edges[edge].get(attribute, 'unknown') for edge in G.edges]\n", + "\n", + " # Determine if the attribute is categorical or numerical\n", + " if isinstance(attribute_values[0], str) or isinstance(attribute_values[0], bool): # Categorical\n", + " unique_values = list(set(attribute_values))\n", + " colors = plt.cm.get_cmap('tab20', len(unique_values))(range(len(unique_values)))\n", + " colors = colors[np.random.permutation(len(unique_values))]\n", + " color_map = dict(zip(unique_values, colors))\n", + " edge_colors = [color_map[val] for val in attribute_values]\n", + " else: # Numerical\n", + " norm = mcolors.Normalize(vmin=min(attribute_values), vmax=max(attribute_values))\n", + " color_map = plt.cm.ScalarMappable(norm=norm, cmap='plasma')\n", + " edge_colors = [color_map.to_rgba(val) for val in attribute_values]\n", + "\n", + " # Plot the graph with edges colored by the specified attribute\n", + " fig, ax = plt.subplots(figsize=figsize)\n", + " ox.plot_graph(\n", + " G,\n", + " ax=ax,\n", + " bgcolor=\"#222222\",\n", + " node_color=\"#00FFAA\",\n", + " node_size=0.2,\n", + " node_alpha=0.9,\n", + " node_edgecolor=\"#333333\",\n", + " edge_color=edge_colors,\n", + " edge_linewidth=0.7,\n", + " edge_alpha=1,\n", + " show=False,\n", + " close=False\n", + " )\n", + " ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)\n", + "\n", + " # Set title\n", + " ax.set_title(title, color=\"white\")\n", + "\n", + " # Add legend for categorical attributes\n", + " if isinstance(attribute_values[0], str) or isinstance(attribute_values[0], bool):\n", + " handles = [plt.Line2D([0], [0], color=color_map[val], lw=4) for val in unique_values]\n", + " ax.legend(handles, unique_values, title=attribute, loc=\"lower right\", frameon=False, fontsize=10)\n", + " elif isinstance(attribute_values[0], (int, float)):\n", + " # Add a color bar for numerical attributes\n", + " cbar = plt.colorbar(color_map, ax=ax)\n", + " cbar.set_label(attribute)\n", + "\n", + " fig.savefig(f'{studyArea}_{attribute}.png', dpi=600, bbox_inches='tight')\n", + " plt.show()\n", + " \n", + "def analyze_specific_edge_attributes(df):\n", + " # Descriptive stats for numeric attributes\n", + " numeric_summary = df[['length', 'speed_kph']].describe().T.round(2)\n", + " print(\"\\nDescriptive statistics for numeric attributes in edges:\")\n", + " display(numeric_summary)\n", + " \n", + " # Value counts for each categorical attribute in edges\n", + " categorical_attributes = ['oneway', 'maxspeed', 'lanes', 'sidewalk', 'cycleway', \n", + " 'access', 'maxweight', 'hgv', 'highway']\n", + " for attr in categorical_attributes:\n", + " print(f\"\\nValue counts for '{attr}' in edges:\")\n", + " value_counts_df = df[attr].value_counts(dropna=False).to_frame(name=\"Count\")\n", + " display(value_counts_df)\n", + "\n", + "def analyze_specific_node_attributes(df):\n", + " # Value counts for each categorical attribute in nodes\n", + " node_categorical_attributes = ['street_count', 'traffic_signals']\n", + " for attr in node_categorical_attributes:\n", + " print(f\"\\nValue counts for '{attr}' in nodes:\")\n", + " value_counts_df = df[attr].value_counts(dropna=False).to_frame(name=\"Count\")\n", + " display(value_counts_df)\n", + "\n", + "def save_graph_to_osm(G, filename=\"output.osm\"):\n", + " # Bounding box\n", + " xs = [d['x'] for _, d in G.nodes(data=True) if 'x' in d]\n", + " ys = [d['y'] for _, d in G.nodes(data=True) if 'y' in d]\n", + " minlon, maxlon = min(xs), max(xs)\n", + " minlat, maxlat = min(ys), max(ys)\n", + "\n", + " root = ET.Element(\"osm\", version=\"0.6\", generator=\"OSMnx2OSM\")\n", + " ET.SubElement(root, \"bounds\",\n", + " minlat=str(minlat), minlon=str(minlon),\n", + " maxlat=str(maxlat), maxlon=str(maxlon))\n", + "\n", + " node_map = {}\n", + " node_id = 1\n", + "\n", + " # Write nodes + attributes as tags\n", + " for n, d in G.nodes(data=True):\n", + " lat, lon = d.get('y'), d.get('x')\n", + " if lat is None or lon is None: continue\n", + " node = ET.SubElement(root, \"node\",\n", + " id=str(node_id), lat=str(lat), lon=str(lon),\n", + " version=\"1\", changeset=\"1\", user=\"osmnx\", uid=\"1\",\n", + " timestamp=\"2020-01-01T00:00:00Z\"\n", + " )\n", + " node_map[n] = node_id\n", + " for k, v in d.items():\n", + " if k not in (\"x\", \"y\") and v is not None:\n", + " ET.SubElement(node, \"tag\", k=str(k), v=str(v))\n", + " node_id += 1\n", + "\n", + " # Write ways (edges) + attributes as tags\n", + " way_id = -1\n", + " for u, v, edata in G.edges(data=True):\n", + " if u not in node_map or v not in node_map: \n", + " continue\n", + " way = ET.SubElement(root, \"way\",\n", + " id=str(way_id), version=\"1\", changeset=\"1\",\n", + " user=\"osmnx\", uid=\"1\", timestamp=\"2020-01-01T00:00:00Z\")\n", + " ET.SubElement(way, \"nd\", ref=str(node_map[u]))\n", + " ET.SubElement(way, \"nd\", ref=str(node_map[v]))\n", + " # At least one standard OSM tag\n", + " ET.SubElement(way, \"tag\", k=\"highway\", v=\"road\")\n", + " # Dump all other attributes\n", + " for k, v_ in edata.items():\n", + " if v_ is not None:\n", + " ET.SubElement(way, \"tag\", k=str(k), v=str(v_))\n", + " way_id -= 1\n", + "\n", + " ET.ElementTree(root).write(filename, encoding=\"utf-8\", xml_declaration=True)" + ] + }, + { + "cell_type": "markdown", + "id": "0939a512", + "metadata": {}, + "source": [ + "# FUNCTIONS FOR FREIGHT ATTRIBUTES" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "0f86667b", + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import pandas as pd\n", + "\n", + "# Example thresholds (adjust as needed)\n", + "MDV_LIMIT_LBS = 26000\n", + "HDV_LIMIT_LBS = 80000\n", + "MDV_LIMIT_FT = 40\n", + "HDV_LIMIT_FT = 53\n", + "\n", + "def parse_weight_to_lbs(w):\n", + " if not isinstance(w, str):\n", + " return None\n", + " s = w.strip().lower()\n", + " m = re.match(r\"(\\d+(\\.\\d+)?)\", s)\n", + " if not m:\n", + " return None\n", + " val = float(m.group(1))\n", + " # Simple pattern checks:\n", + " if ' lbs' in s: return val\n", + " if ' st' in s or 'st' in s: return val * 2000 # short tons\n", + " if ' t' in s: return val * 2204.6226 # metric tons\n", + " return val * 2000 # default to short tons\n", + "\n", + "def parse_length_to_feet(l):\n", + " if not isinstance(l, str):\n", + " return None\n", + " s = l.strip().lower()\n", + " m = re.match(r\"(\\d+)(\\'\\d+\\\"|\\'\\d+|\\'|\\\")?\", s)\n", + " if m:\n", + " return float(m.group(1))\n", + " try:\n", + " return float(s)\n", + " except:\n", + " return None\n", + "\n", + "def parse_hgv(h):\n", + " if h is True:\n", + " return True\n", + " if h is False:\n", + " return False\n", + " if isinstance(h, list) and True in h:\n", + " return True\n", + " return None\n", + "\n", + "def classify_truck_allowed(row):\n", + " w = row.get('maxweight_combined_lbs')\n", + " ft = row.get('maxlength_ft')\n", + " \n", + " # Helper to classify a numeric value (could be weight or length)\n", + " def classify_val(val, md_limit, hd_limit):\n", + " if val >= hd_limit:\n", + " return \"HD\"\n", + " elif val >= md_limit:\n", + " return \"MD\"\n", + " else:\n", + " return \"LD\"\n", + " \n", + " # Classify length if available\n", + " if pd.notnull(ft):\n", + " l_class = classify_val(ft, MDV_LIMIT_FT, HDV_LIMIT_FT)\n", + " else:\n", + " l_class = None\n", + " \n", + " # Classify weight if available\n", + " if pd.notnull(w):\n", + " w_class = classify_val(w, MDV_LIMIT_LBS, HDV_LIMIT_LBS)\n", + " else:\n", + " w_class = None\n", + " \n", + " # Combine logic\n", + " # Priority: if both exist, take the \"stricter\" (lower) class. \n", + " # If only one exists, use that. If neither, return None.\n", + " if w_class and l_class:\n", + " rank = {\"LD\": 0, \"MD\": 1, \"HD\": 2}\n", + " return w_class if rank[w_class] <= rank[l_class] else l_class\n", + " elif w_class:\n", + " return w_class\n", + " elif l_class:\n", + " return l_class\n", + " else:\n", + " return None\n", + " \n", + "def interpret_hgv_str(val):\n", + " \"\"\"\n", + " Example logic:\n", + " - \"no\" remains \"no\"\n", + " - everything else, treat as \"yes\"\n", + " \n", + " designated 4613\n", + " no 424\n", + " destination 34\n", + " delivery 12\n", + " discouraged 6\n", + " yes 6\n", + " [yes, designated] 3\n", + " Name: hgv, dtype: int64\n", + " \"\"\"\n", + " if val is None:\n", + " return None\n", + " \n", + " # Flatten list values like [yes, designated], turning them into a single string\n", + " if isinstance(val, list):\n", + " val = \" \".join(str(x) for x in val)\n", + " \n", + " # Basic logic: if the string includes 'no', treat it as 'no'; otherwise 'yes'\n", + " val_str = str(val).lower()\n", + " if \"no\" in val_str:\n", + " return \"no\"\n", + " return \"yes\"\n", + "\n", + "def compute_vehicle_allowed(row):\n", + " # Combines the original hgv flag and truck classification.\n", + " orig_hgv = interpret_hgv_str(row['old_hgv'])\n", + " truck_class = row['truck_allowed']\n", + " hgv_allowed = \"no\" if orig_hgv == \"no\" or truck_class in [\"LD\", \"MD\"] else \"yes\"\n", + " mgv_allowed = \"no\" if truck_class in [\"LD\"] else \"yes\"\n", + " return pd.Series({\"hgv\": hgv_allowed, \"mgv\": mgv_allowed})\n", + "\n", + "def compute_vehicle_allowed_from_truck(row):\n", + " # Bases allowed status solely on truck classification.\n", + " truck_class = row['truck_allowed']\n", + " hgv_allowed = \"no\" if truck_class in [\"LD\", \"MD\"] else \"yes\"\n", + " mgv_allowed = \"no\" if truck_class in [\"LD\"] else \"yes\"\n", + " return pd.Series({\"hgv\": hgv_allowed, \"mgv\": mgv_allowed})\n", + "\n", + "def update_freight_script(edges):\n", + " \n", + " print(\"Parsing columns if present...\")\n", + " for col in ['maxweight:hgv', 'maxweight', 'hgv', 'maxlength']:\n", + " if col in edges.columns:\n", + " print(f\" -> Found '{col}'; parsing...\")\n", + " if col in ['maxweight:hgv', 'maxweight']:\n", + " edges[col + '_lbs'] = edges[col].apply(parse_weight_to_lbs)\n", + " edges[col + '_str'] = edges[col + '_lbs'].apply(lambda x: f\"{x:,.0f} lbs\" if pd.notnull(x) else None)\n", + " elif col == 'hgv':\n", + " edges[col + '_bool'] = edges[col].apply(parse_hgv)\n", + " elif col == 'maxlength':\n", + " edges[col + '_ft'] = edges[col].apply(parse_length_to_feet)\n", + " else:\n", + " print(f\" -> '{col}' not found; skipping.\")\n", + "\n", + " print(\"Merging 'maxweight:hgv' over 'maxweight' if both exist...\")\n", + " if 'maxweight:hgv_lbs' not in edges.columns:\n", + " edges['maxweight:hgv_lbs'] = None\n", + " if 'maxweight_lbs' not in edges.columns:\n", + " edges['maxweight_lbs'] = None\n", + "\n", + " # Override: if 'maxweight:hgv_lbs' is not null, use it; otherwise use 'maxweight_lbs'\n", + " edges['maxweight_combined_lbs'] = edges.apply(\n", + " lambda x: x['maxweight:hgv_lbs'] if pd.notnull(x['maxweight:hgv_lbs']) else x['maxweight_lbs'],\n", + " axis=1\n", + " )\n", + " edges['maxweight_combined_str'] = edges['maxweight_combined_lbs'].apply(\n", + " lambda w: f\"{w:,.0f} lbs\" if pd.notnull(w) else None\n", + " )\n", + "\n", + " print(\"Classifying truck access based on weight & length thresholds...\")\n", + " edges['truck_allowed'] = edges.apply(classify_truck_allowed, axis=1)\n", + " \n", + " \n", + " if 'hgv' in edges.columns:\n", + " # Save the original HGV data.\n", + " edges['old_hgv'] = edges['hgv']\n", + " # Compute new hgv/mgv columns combining old_hgv and truck_allowed.\n", + " edges[['hgv', 'mgv']] = edges.apply(compute_vehicle_allowed, axis=1)\n", + " else:\n", + " # If no original hgv column exists, compute new columns solely from truck_allowed.\n", + " edges[['hgv', 'mgv']] = edges.apply(compute_vehicle_allowed_from_truck, axis=1)\n", + " \n", + " \n", + " if 'old_hgv' in edges.columns:\n", + " print('old_hgv', edges['old_hgv'].value_counts()) \n", + " if 'maxweight:hgv' in edges.columns:\n", + " print('maxweight:hgv', edges['maxweight:hgv'].value_counts()) \n", + " if 'maxweight:hgv_lbs' in edges.columns:\n", + " print('maxweight:hgv_lbs', edges['maxweight:hgv_lbs'].value_counts()) \n", + " if 'maxweight_combined_lbs' in edges.columns:\n", + " print('maxweight_combined_lbs', edges['maxweight_combined_lbs'].value_counts()) \n", + " if 'maxweight_combined_str' in edges.columns:\n", + " print('maxweight_combined_str', edges['maxweight_combined_str'].value_counts()) \n", + " if 'truck_allowed' in edges.columns:\n", + " print('truck_allowed', edges['truck_allowed'].value_counts()) \n", + " if 'maxweight' in edges.columns:\n", + " print('maxweight', edges['maxweight'].value_counts()) \n", + " if 'maxweight_lbs' in edges.columns:\n", + " print('maxweight_lbs', edges['maxweight_lbs'].value_counts()) \n", + " if 'maxweight_str' in edges.columns:\n", + " print('maxweight_str', edges['maxweight_str'].value_counts()) \n", + " if 'maxlength' in edges.columns:\n", + " print('maxlength', edges['maxlength'].value_counts()) \n", + " if 'maxlength_ft' in edges.columns:\n", + " print('maxlength_ft', edges['maxlength_ft'].value_counts()) \n", + " if 'hgv' in edges.columns:\n", + " print('hgv', edges['hgv'].value_counts()) \n", + " if 'mgv' in edges.columns:\n", + " print('mgv', edges['mgv'].value_counts()) \n", + " edges = edges.drop(['old_hgv','maxweight:hgv','maxweight:hgv_lbs','maxweight:hgv_str','maxweight_combined_lbs','maxweight_combined_str','truck_allowed','maxweight','maxweight_lbs','maxweight_str','maxlength','maxlength_ft'], axis=1, errors='ignore')\n", + "\n", + " \n", + " print(\"Freight script update complete!\")\n", + " return edges\n" + ] + }, + { + "cell_type": "markdown", + "id": "22b42922", + "metadata": {}, + "source": [ + "# EXECUTE" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "69e92e28", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'county': 'San Francisco', 'state': 'California'}\n", + "{'network_type': 'drive', 'simplify': False, 'retain_all': True, 'truncate_by_edge': True, 'which_result': None, 'custom_filter': '[\"highway\"~\"motorway|primary|residential|trunk|secondary|tertiary|motorway_link|trunk_link|primary_link|secondary_link|tertiary_link|unclassified\"]'}\n", + "{'county': 'San Francisco', 'state': 'California'}\n", + "{'network_type': 'drive', 'simplify': False, 'retain_all': True, 'truncate_by_edge': True, 'which_result': None, 'custom_filter': '[\"highway\"~\"motorway|primary|trunk|secondary|tertiary|motorway_link|trunk_link|primary_link|secondary_link|tertiary_link|unclassified\"]'}\n", + "{'county': 'Alameda', 'state': 'California'}\n", + "{'network_type': 'drive', 'simplify': False, 'retain_all': True, 'truncate_by_edge': True, 'which_result': None, 'custom_filter': '[\"highway\"~\"motorway|primary|trunk|secondary|tertiary|motorway_link|trunk_link|primary_link|secondary_link|tertiary_link|unclassified\"]'}\n", + "{'county': 'Contra Costa', 'state': 'California'}\n", + "{'network_type': 'drive', 'simplify': False, 'retain_all': True, 'truncate_by_edge': True, 'which_result': None, 'custom_filter': '[\"highway\"~\"motorway|primary|trunk|secondary|tertiary|motorway_link|trunk_link|primary_link|secondary_link|tertiary_link|unclassified\"]'}\n", + "{'county': 'Marin', 'state': 'California'}\n", + "{'network_type': 'drive', 'simplify': False, 'retain_all': True, 'truncate_by_edge': True, 'which_result': None, 'custom_filter': '[\"highway\"~\"motorway|primary|trunk|secondary|tertiary|motorway_link|trunk_link|primary_link|secondary_link|tertiary_link|unclassified\"]'}\n", + "{'county': 'Napa', 'state': 'California'}\n", + "{'network_type': 'drive', 'simplify': False, 'retain_all': True, 'truncate_by_edge': True, 'which_result': None, 'custom_filter': '[\"highway\"~\"motorway|primary|trunk|secondary|tertiary|motorway_link|trunk_link|primary_link|secondary_link|tertiary_link|unclassified\"]'}\n", + "{'county': 'San Mateo', 'state': 'California'}\n", + "{'network_type': 'drive', 'simplify': False, 'retain_all': True, 'truncate_by_edge': True, 'which_result': None, 'custom_filter': '[\"highway\"~\"motorway|primary|trunk|secondary|tertiary|motorway_link|trunk_link|primary_link|secondary_link|tertiary_link|unclassified\"]'}\n", + "{'county': 'Santa Clara', 'state': 'California'}\n", + "{'network_type': 'drive', 'simplify': False, 'retain_all': True, 'truncate_by_edge': True, 'which_result': None, 'custom_filter': '[\"highway\"~\"motorway|primary|trunk|secondary|tertiary|motorway_link|trunk_link|primary_link|secondary_link|tertiary_link|unclassified\"]'}\n", + "{'county': 'Solano', 'state': 'California'}\n", + "{'network_type': 'drive', 'simplify': False, 'retain_all': True, 'truncate_by_edge': True, 'which_result': None, 'custom_filter': '[\"highway\"~\"motorway|primary|trunk|secondary|tertiary|motorway_link|trunk_link|primary_link|secondary_link|tertiary_link|unclassified\"]'}\n", + "{'county': 'Sonoma', 'state': 'California'}\n", + "{'network_type': 'drive', 'simplify': False, 'retain_all': True, 'truncate_by_edge': True, 'which_result': None, 'custom_filter': '[\"highway\"~\"motorway|primary|residential|trunk|secondary|tertiary|motorway_link|trunk_link|primary_link|secondary_link|tertiary_link|unclassified\"]'}\n", + "Parsing columns if present...\n", + " -> Found 'maxweight:hgv'; parsing...\n", + " -> Found 'maxweight'; parsing...\n", + " -> Found 'hgv'; parsing...\n", + " -> Found 'maxlength'; parsing...\n", + "Merging 'maxweight:hgv' over 'maxweight' if both exist...\n", + "Classifying truck access based on weight & length thresholds...\n", + "old_hgv designated 70826\n", + "no 4103\n", + "destination 792\n", + "yes 584\n", + "delivery 54\n", + "discouraged 8\n", + "Name: old_hgv, dtype: int64\n", + "maxweight:hgv 7 st 2652\n", + "4.5 st 2459\n", + "4 st 270\n", + "3 st 265\n", + "5 st 196\n", + "3 148\n", + "9.5 st 32\n", + "3 t 26\n", + "9 st 18\n", + "5 15\n", + "3.5 st 9\n", + "8 st 8\n", + "6.35 8\n", + "3st 4\n", + "37 st 2\n", + "Name: maxweight:hgv, dtype: int64\n", + "maxweight:hgv_lbs 14000.0000 2652\n", + "9000.0000 2459\n", + "6000.0000 417\n", + "8000.0000 270\n", + "10000.0000 211\n", + "19000.0000 32\n", + "6613.8678 26\n", + "18000.0000 18\n", + "7000.0000 9\n", + "16000.0000 8\n", + "12700.0000 8\n", + "74000.0000 2\n", + "Name: maxweight:hgv_lbs, dtype: int64\n", + "maxweight_combined_lbs 14000.0000 2836\n", + "9000.0000 2555\n", + "6000.0000 2349\n", + "10000.0000 347\n", + "8000.0000 317\n", + "11000.0000 217\n", + "19000.0000 32\n", + "6613.8678 26\n", + "18000.0000 18\n", + "7000.0000 9\n", + "16000.0000 8\n", + "12700.0000 8\n", + "74000.0000 2\n", + "30000.0000 2\n", + "Name: maxweight_combined_lbs, dtype: int64\n", + "maxweight_combined_str 14,000 lbs 2836\n", + "9,000 lbs 2555\n", + "6,000 lbs 2349\n", + "10,000 lbs 347\n", + "8,000 lbs 317\n", + "11,000 lbs 217\n", + "19,000 lbs 32\n", + "6,614 lbs 26\n", + "18,000 lbs 18\n", + "7,000 lbs 9\n", + "16,000 lbs 8\n", + "12,700 lbs 8\n", + "74,000 lbs 2\n", + "30,000 lbs 2\n", + "Name: maxweight_combined_str, dtype: int64\n", + "truck_allowed LD 9540\n", + "HD 5126\n", + "MD 4\n", + "Name: truck_allowed, dtype: int64\n", + "maxweight 3 1557\n", + "3 st 263\n", + "5.5 217\n", + "7 st 132\n", + "6000 lbs 120\n", + "4.5 96\n", + "10000 lbs 87\n", + "7 54\n", + "5 st 46\n", + "8000 lbs 39\n", + "9000 lbs 34\n", + "4 8\n", + "14000 lbs 6\n", + "3st 4\n", + "5 3\n", + "15 st 2\n", + "Name: maxweight, dtype: int64\n", + "maxweight_lbs 6000.0 1944\n", + "11000.0 217\n", + "14000.0 192\n", + "10000.0 136\n", + "9000.0 130\n", + "8000.0 47\n", + "30000.0 2\n", + "Name: maxweight_lbs, dtype: int64\n", + "maxweight_str 6,000 lbs 1944\n", + "11,000 lbs 217\n", + "14,000 lbs 192\n", + "10,000 lbs 136\n", + "9,000 lbs 130\n", + "8,000 lbs 47\n", + "30,000 lbs 2\n", + "Name: maxweight_str, dtype: int64\n", + "maxlength 60' 5126\n", + "35'0\" 526\n", + "35' 276\n", + "30' 16\n", + "Name: maxlength, dtype: int64\n", + "maxlength_ft 60.0 5126\n", + "35.0 802\n", + "30.0 16\n", + "Name: maxlength_ft, dtype: int64\n", + "hgv yes 1285154\n", + "no 12880\n", + "Name: hgv, dtype: int64\n", + "mgv yes 1288494\n", + "no 9540\n", + "Name: mgv, dtype: int64\n", + "Freight script update complete!\n", + "Nodes: 781847, Edges: 1298034\n", + "consolidate intersections\n", + "simplify network\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Graph successfully saved to 'sfbay_2_graph.pkl'.\n" + ] + } + ], + "source": [ + "############################## Download Networks ##############################\n", + " \n", + "G = combine_graphs()\n", + "\n", + "G = ox.project_graph(G, to_crs=\"epsg:3857\")\n", + "\n", + "plot(G, f'{studyArea}_{str(simpl_intersections)}_original_graph')\n", + "\n", + "G_final = G.copy()\n", + "\n", + "############################## Add Attributes\n", + "\n", + "G_final = ox.add_edge_speeds(G_final)\n", + "# G_final = ox.add_edge_lanes(G_final)\n", + "# G_final = ox.add_edge_capacities(G_final)\n", + "nodes, edges = ox.graph_to_gdfs(G_final)\n", + "edges_freight = update_freight_script(edges)\n", + "G_final = ox.graph_from_gdfs(nodes, edges_freight, graph_attrs=G_final.graph)\n", + "\n", + "nodes, edges = ox.graph_to_gdfs(G_final)\n", + "print(f'Nodes: {len(nodes)}, Edges: {len(edges)}')\n", + "\n", + "\n", + "\n", + "############################## Consolidate Nodes\n", + "\n", + "print('consolidate intersections')\n", + "\n", + "G_final = ox.consolidate_intersections(G_final, tolerance=simpl_intersections, rebuild_graph=True, dead_ends=True, reconnect_edges=True\n", + " )\n", + "\n", + "# Update length\n", + "nodes, edges = ox.graph_to_gdfs(G_final)\n", + "edges['length'] = edges['geometry'].length\n", + "G_final = ox.graph_from_gdfs(nodes, edges, graph_attrs=G_final.graph)\n", + "\n", + "\n", + "#Plot\n", + "\n", + "plot(G_final, f'{studyArea}_{str(simpl_intersections)}_consolidated_graph')\n", + "\n", + "\n", + "############################## Simplify Network\n", + "print('simplify network')\n", + "G_final = ox.simplification.simplify_graph(G_final, \n", + " edge_attrs_differ = splitLinksBy,\n", + " remove_rings = False,\n", + " track_merged = True,\n", + " )\n", + "\n", + "\n", + "plot(G_final, f'{studyArea}_{str(simpl_intersections)}_simplified_graph')\n", + "\n", + "############################## Truncate Network\n", + "\n", + "G_final = ox.truncate.largest_component(G_final) \n", + "plot(G_final, f'{studyArea}_{str(simpl_intersections)}_truncated_graph')\n", + "\n", + "plot_graph_by_attribute(G_final, attribute='highway', title=\"Network Colored by Highway Type\")\n", + "plot_graph_by_attribute(G_final, attribute='lanes', title=\"Network Colored by Number of Lanes\")\n", + "plot_graph_by_attribute(G_final, attribute='mgv', title=\"Network Colored by Number of Lanes\")\n", + "plot_graph_by_attribute(G_final, attribute='hgv', title=\"Network Colored by Number of Lanes\")\n", + "\n", + "# Define the file path where you want to save the pickle file\n", + "save_path = f'{studyArea}_{simpl_intersections}_graph.pkl'\n", + "\n", + "# Save the graph using pickle\n", + "with open(save_path, 'wb') as f:\n", + " pickle.dump(G_final, f)\n", + "print(f\"Graph successfully saved to '{save_path}'.\")\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "98eda2af", + "metadata": {}, + "source": [ + "# Save Network for the Simulation" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "fa52c30d", + "metadata": {}, + "outputs": [], + "source": [ + "# Save GPKG\n", + "\n", + "ox.save_graph_geopackage(G_final, filepath=\"SFBayArea.gpkg\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1cce2da6", + "metadata": {}, + "outputs": [], + "source": [ + "# Save OSM\n", + "\n", + "\n", + "\n", + "G_final = ox.project_graph(G_final, to_crs=\"epsg:4326\")\n", + "save_graph_to_osm(G_final, filename=\"output.osm\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "9b0b2194", + "metadata": {}, + "outputs": [], + "source": [ + "# osmium cat sfbay-unclassified-partiallysimplified-unprojected-sfres.osm -o sfbay-unclassified-partiallysimplified-unprojected-sfres.osm.pbf\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e57b0fd8", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d620c70c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/main/python/network_validation/__init__.py b/src/main/python/network_validation/__init__.py new file mode 100644 index 00000000000..8efceb33052 --- /dev/null +++ b/src/main/python/network_validation/__init__.py @@ -0,0 +1 @@ +# python/network_validation/__init__.py \ No newline at end of file diff --git a/src/main/python/network_validation/_data_collection_utils.py b/src/main/python/network_validation/_data_collection_utils.py new file mode 100644 index 00000000000..25420e1e44f --- /dev/null +++ b/src/main/python/network_validation/_data_collection_utils.py @@ -0,0 +1,435 @@ +import os + +import geopandas as gpd +import pandas as pd + +def collect_census_data(state_fips_code, county_fips_codes, year, census_data_file, geo_level='county'): + """ + Collect census data at specified geographic level (county, tract, or CBG). + + Parameters + ---------- + state_fips_code : str + FIPS code for the state + county_fips_codes : list or str + List of county FIPS codes or comma-separated string + year : int + Census year + census_data_file : str + Path to save the CSV output + geo_level : str + Geographic level for data collection: 'county', 'tract', or 'cbg' + Default is 'county' + + Returns + ------- + pandas.DataFrame + DataFrame containing population data for the specified geographic level + """ + # Validate geo_level parameter + valid_levels = ['county', 'tract', 'cbg'] + if geo_level.lower() not in valid_levels: + raise ValueError(f"Invalid geo_level '{geo_level}'. Must be one of: {', '.join(valid_levels)}") + + geo_level = geo_level.lower() + + # Check if the output file already exists + if os.path.exists(census_data_file): + print(f"Loading existing {geo_level} data from {census_data_file}") + return pd.read_csv(census_data_file, dtype={'GEOID': str}) + + # Get Census API key from file + api_key_path = os.path.expanduser("~/.census_api_key") + try: + with open(api_key_path, 'r') as f: + census_api_key = f.read().strip() + print(f"Your Census API key is [{census_api_key}]") + except FileNotFoundError: + raise FileNotFoundError( + f"Census API key file not found at {api_key_path}. Please create this file with your API key.") + + if not census_api_key: + raise ValueError("Census API key is empty. Please check your API key file.") + + print(f"Collecting {geo_level.upper()} data for year {year}...") + + # Initialize the Census API + from census import Census + c = Census(census_api_key, year=year) + + # Convert list of county FIPS to comma-separated string if it's a list + if isinstance(county_fips_codes, list): + county_fips_string = ','.join(county_fips_codes) + else: + county_fips_string = county_fips_codes + + print(f"Downloading population data for {geo_level}s...") + + try: + # Different API calls based on geographic level + if geo_level == 'county': + census_data = c.acs5.state_county( + fields=('NAME', 'B01003_001E'), # B01003_001E is total population + state_fips=state_fips_code, + county_fips=county_fips_string + ) + elif geo_level == 'tract': + census_data = c.acs5.state_county_tract( + fields=('NAME', 'B01003_001E'), + state_fips=state_fips_code, + county_fips=county_fips_string, + tract='*' # Request all tracts + ) + elif geo_level == 'cbg': + census_data = c.acs5.state_county_blockgroup( + fields=('NAME', 'B01003_001E'), + state_fips=state_fips_code, + county_fips=county_fips_string, + blockgroup='*' # Request all block groups + ) + else: + raise ValueError(f"Invalid geo_level '{geo_level}'. Must be one of: {', '.join(valid_levels)}") + + # Create a DataFrame from the census data + df = pd.DataFrame(census_data) + + # Rename columns for clarity + df = df.rename(columns={'B01003_001E': 'population', 'NAME': 'name'}) + + # Create GEOID based on geographic level + if geo_level == 'county': + df['GEOID'] = df['state'] + df['county'] + elif geo_level == 'tract': + df['GEOID'] = df['state'] + df['county'] + df['tract'] + elif geo_level == 'cbg': + df['GEOID'] = df['state'] + df['county'] + df['tract'] + df['block group'] + + # Convert population to numeric + df['population'] = pd.to_numeric(df['population'], errors='coerce') + + # Save the raw census data + if census_data_file: + df.to_csv(census_data_file, index=False) + print(f"{geo_level.capitalize()} population data saved to {census_data_file}") + + return df + + except Exception as e: + print(f"Error downloading Census data: {e}") + raise + + +def download_tract_census_data(state_fips_code, county_fips_codes, year, census_data_file): + """ + Download census tract population data from the Census Bureau's ACS 5-year estimates. + + Parameters + ---------- + state_fips_code : str + FIPS code for the state + county_fips_codes : list + List of county FIPS codes + year : int + Reference year for population estimates (July 1st reference date) + census_data_file: str + Path to the CSV file where population data will be saved + + Returns + ------- + pandas.DataFrame + DataFrame containing population data for census tracts + """ + from cenpy import products + if not os.path.exists(census_data_file): + # Connect to Census API + try: + conn = products.APIConnection(f"ACSDT5Y{year}") + # Get population data for tracts + pop_data = None + for county_fips in county_fips_codes: + tract_data = conn.query( + ['B01003_001E'], # Total population estimate + geo_unit='tract', + geo_filter={ + "state": state_fips_code, + "county": county_fips + } + ) + pop_data = pd.concat([pop_data, tract_data]) if pop_data is not None else tract_data + + # Rename columns + pop_data = pop_data.rename(columns={'B01003_001E': 'population'}) + + # Create GEOID by combining state, county, and tract + pop_data['GEOID'] = (pop_data['state'] + pop_data['county'] + pop_data['tract']).astype(str) + + # Convert population to numeric + pop_data['population'] = pd.to_numeric(pop_data['population'], errors='coerce') + pop_data.to_csv(census_data_file, index=False) + + except Exception as e: + print(f"Failed to retrieve population data: {e}") + raise + else: + pop_data = pd.read_csv(census_data_file, dtype={'GEOID': str}) + + return pop_data + + +def collect_tract_boundaries(state_fips_code, county_fips_codes, year): + """ + Download census tract boundaries from TIGER/Line shapefiles. + + Parameters + ---------- + state_fips_code : str + FIPS code for the state + county_fips_codes : list + List of county FIPS codes + year : int + Reference year for boundaries + + Returns + ------- + geopandas.GeoDataFrame + GeoDataFrame containing tract boundaries + """ + try: + # Download geographic boundaries + geo_url = f"https://www2.census.gov/geo/tiger/TIGER{year}/TRACT/tl_{year}_{state_fips_code}_tract.zip" + geo_data = gpd.read_file(geo_url) + + # Filter for counties of interest + geo_data = geo_data[geo_data['COUNTYFP'].isin(county_fips_codes)] + except Exception as e: + print(f"Failed to retrieve geographic boundaries: {e}") + raise + return geo_data + +def collect_geographic_boundaries(state_fips_code, county_fips_codes, year, area_name, geo_level, work_dir): + study_area_boundary_geo_path = f"{work_dir}/{area_name}_{geo_level}_{year}_wgs84.geojson" + if os.path.exists(study_area_boundary_geo_path): + return gpd.read_file(study_area_boundary_geo_path) + else: + from pygris import counties, block_groups + + if geo_level == 'county': + # Define fips code for selected counties + geo_data = counties(state=state_fips_code, year=year, cb=True, cache=True) + elif geo_level == 'cbg': + # Define fips code for selected counties + geo_data = block_groups(state=state_fips_code, year=year, cb=True, cache=True) + elif geo_level == 'taz': + geo_data = collect_taz_boundaries(state_fips_code, year, os.path.dirname(study_area_boundary_geo_path)) + elif geo_level == "tract": + geo_data = collect_tract_boundaries(state_fips_code, county_fips_codes, year) + else: + raise ValueError("Unsupported geographic level. Choose 'counties' or 'cbgs'.") + + countyfp_columns = [col for col in geo_data.columns if col.startswith('COUNTYFP')] + mask = geo_data[countyfp_columns].apply(lambda x: x.isin(county_fips_codes)).any(axis=1) + selected_geo = geo_data[mask] + + # def string_to_double(s): + # return float(s if s != "" else "0") + # + # # Prepare columns and mask + # aland_columns = [col for col in selected_geo.columns if col.startswith('ALAND')] + # awater_columns = [col for col in selected_geo.columns if col.startswith('AWATER')] + # for col in aland_columns + awater_columns: + # selected_geo.loc[:, col] = selected_geo[col].apply(string_to_double) + # mask = pd.Series([False] * len(selected_geo), index=selected_geo.index) + # + # for aland_col, awater_col in zip(aland_columns, awater_columns): + # # AWATER should not be more than three times ALAND + # mask |= (selected_geo[aland_col] > 0) & (selected_geo[awater_col] < 3 * selected_geo[aland_col]) + # + # # Apply the mask to filter selected_geo + # selected_geo = selected_geo[mask] + # study_area_geo_projected_path = base_name + "_epsg" + str(projected_coordinate_system) + extension + # selected_geo.to_crs(epsg=projected_coordinate_system).to_file(study_area_geo_projected_path, driver="GeoJSON") + + selected_geo_wgs84 = selected_geo.to_crs(epsg=4326) + selected_geo_wgs84.to_file(study_area_boundary_geo_path, driver="GeoJSON") + return selected_geo_wgs84 + +def collect_taz_boundaries(state_fips_code, year, output_dir): + from zipfile import ZipFile + state_geo_zip = output_dir + f"/tl_{year}_{state_fips_code}_taz10.zip" + if not os.path.exists(state_geo_zip): + state_geo_zip = download_taz_shapefile(state_fips_code, year, output_dir) + """ + Read a shapefile from a ZIP archive, filter geometries by county FIPS codes, + and write the result to a GeoJSON file. + + Parameters: + - zip_file_path: Path to the ZIP file containing the shapefile. + - county_fips_codes: List of county FIPS codes to filter by. + - output_geojson_path: Path to save the filtered data as a GeoJSON file. + """ + # Extract the shapefile from the ZIP archive + with ZipFile(state_geo_zip, 'r') as zip_ref: + # Extract all files to a temporary directory + temp_dir = "temp_shp" + zip_ref.extractall(temp_dir) + + # Find the .shp file in the extracted files + shapefile_name = [f for f in os.listdir(temp_dir) if f.endswith('.shp')][0] + shapefile_path = os.path.join(temp_dir, shapefile_name) + + # Read the shapefile into a GeoDataFrame + gdf = gpd.read_file(shapefile_path) + + # Clean up the temporary directory + for filename in os.listdir(temp_dir): + os.remove(os.path.join(temp_dir, filename)) + os.rmdir(temp_dir) + + return gdf + +def download_taz_shapefile(state_fips_code, year, output_dir): + import requests + """ + Download TAZ shapefiles for a given state-level FIPS code. + + Parameters: + - fips_code: String or integer representing the state-level FIPS code. + - output_dir: Directory to save the downloaded ZIP file. + """ + # Ensure the FIPS code is a string, padded to 2 characters + fips_code_str = str(state_fips_code).zfill(2) + + # Construct the download URL + base_url = f"https://www2.census.gov/geo/tiger/TIGER2010/TAZ/2010/" + filename = f"tl_{year}_{fips_code_str}_taz10.zip" + download_url = base_url + filename + + # Make the output directory if it doesn't exist + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + # Full path for saving the file + output_path = os.path.join(output_dir, filename) + + # Start the download + print(f"Downloading TAZ shapefile for FIPS code {state_fips_code} from {download_url}") + try: + response = requests.get(download_url) + response.raise_for_status() # This will check for errors + + # Write the content of the response to a ZIP file + with open(output_path, 'wb') as file: + file.write(response.content) + + print(f"File saved to {output_path}") + + except requests.RequestException as e: + print(f"Error downloading the file: {e}") + + return output_path + +def filter_boundaries_by_density(geo_data, pop_data, utm_epsg, geo_level, min_density_per_km2, density_geo_file): + """ + Collect census boundaries, calculate population density, and filter by density threshold. + + Parameters + ---------- + geo_data: GeoDataFrame + Boundaries + pop_data: Dataframe + census + utm_epsg: int + EPSG code for the projected coordinate system + geo_level: str + Geographic level ('tract', 'cbg', etc.) + min_density_per_km2: float, optional + Minimum population density threshold (people per km²) + density_geo_file: str + Path to the CSV file containing density based geometry + + Returns + ------- + geopandas.GeoDataFrame + Filtered geographic boundaries based on density threshold + """ + # Check if filtered boundaries already exist + if os.path.exists(density_geo_file): + print(f"Loading existing {geo_level} boundaries...") + filtered_geo = gpd.read_file(density_geo_file) + print(f"✓ Loaded {len(filtered_geo)} {geo_level}s from existing file") + return filtered_geo + + # If not, we need to collect and process the data + print(f"Processing {geo_level} boundaries for density analysis...") + + # Ensure GEOID column has consistent type for merging + geo_data['GEOID'] = geo_data['GEOID'].astype(str) + + # Merge boundaries with population data + geo_with_pop = geo_data.merge(pop_data, on='GEOID') + + # Calculate area and density + geo_with_pop['area_sqkm'] = ( + geo_with_pop.to_crs(epsg=utm_epsg) + .geometry.area / 1000000 # Convert m² to km² + ) + geo_with_pop['density_per_km2'] = geo_with_pop['population'] / geo_with_pop['area_sqkm'] + + # Calculate percentile ranks + geo_with_pop['density_percentile'] = ( + geo_with_pop['density_per_km2'].rank(pct=True) * 100 + ).round(1) + + # Print density analysis summary + print("\nPopulation Density Analysis:") + print("==========================") + + # Density summary + print(f"\n{geo_level.capitalize()} Density Summary (people/km²):") + print("--------------------------------") + stats = geo_with_pop['density_per_km2'].describe() + print(f"Mean density: {stats['mean']:,.1f}") + print(f"Median density: {stats['50%']:,.1f}") + print(f"Standard deviation: {stats['std']:,.1f}") + print(f"Minimum density: {stats['min']:,.1f}") + print(f"Maximum density: {stats['max']:,.1f}") + + # Density distribution + print("\nDensity Distribution Quartiles:") + print("----------------------------") + for q in [0.25, 0.5, 0.75]: + print(f"{int(q * 100)}th percentile: {geo_with_pop['density_per_km2'].quantile(q):,.1f}") + + # Filter by minimum density if specified + if min_density_per_km2 > 0: + print(f"\nFiltering {geo_level}s by minimum density: {min_density_per_km2:,.1f} people/km²") + + # Apply density filter + filtered_geo = geo_with_pop[geo_with_pop["density_per_km2"] >= min_density_per_km2] + + # Print selection results + print(f"\nSelection Results:") + print("----------------") + print(f"Selected {len(filtered_geo)} out of {len(geo_with_pop)} {geo_level}s") + print(f"Total population in selected {geo_level}s: {filtered_geo['population'].sum():,}") + + # Calculate percentage of total population + total_population = geo_with_pop['population'].sum() + if total_population > 0: + population_percentage = (filtered_geo['population'].sum() / total_population * 100) + print(f"Percentage of total population: {population_percentage:.1f}%\n") + else: + print(f"Warning: Total population is zero, cannot calculate percentage\n") + else: + # If no density filter, use all areas + filtered_geo = geo_with_pop + print(f"\nUsing all {len(filtered_geo)} {geo_level}s (no density filter applied)") + + # Ensure output is in WGS84 for consistency + filtered_geo = filtered_geo.to_crs(epsg=4326) + + # Save to file for future use + filtered_geo.to_file(density_geo_file, driver="GeoJSON") + print(f"✓ Saved filtered {geo_level} boundaries to {density_geo_file}") + + return filtered_geo \ No newline at end of file diff --git a/src/main/python/network_validation/convert_network_csv_into_geojson.py b/src/main/python/network_validation/convert_network_csv_into_geojson.py new file mode 100644 index 00000000000..03f640ae134 --- /dev/null +++ b/src/main/python/network_validation/convert_network_csv_into_geojson.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 + +import pandas as pd +import geopandas as gpd +from shapely.geometry import LineString, Point +import os +import logging +import argparse + + +def setup_logging(log_file): + """Set up logging configuration.""" + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler(log_file, mode='w'), + logging.StreamHandler() + ] + ) + + +def validate_network_file(network_df): + """Validate the network file has all required columns.""" + required_columns = [ + 'linkId', 'linkLength', 'linkFreeSpeed', 'linkCapacity', + 'numberOfLanes', 'linkModes', 'attributeOrigId', 'attributeOrigType', + 'fromNodeId', 'toNodeId', 'fromLocationX', 'fromLocationY', + 'toLocationX', 'toLocationY' + ] + + missing_columns = [col for col in required_columns if col not in network_df.columns] + if missing_columns: + raise ValueError(f"Missing required columns in network file: {missing_columns}") + + +def convert_network_to_geojson(network_file, projected_crs_epsg=32048): + """ + Convert network CSV file to GeoJSON format. + + Parameters: + network_file (str): Path to the network.csv.gz file + projected_crs_epsg (int): EPSG code for the projected CRS + + Returns: + str: Path to the created GeoJSON file + """ + try: + logging.info(f"Reading network file: {network_file}") + network_name = os.path.splitext(os.path.splitext(os.path.basename(network_file))[0])[0] + network_df = pd.read_csv(network_file) + + # Validate the input file + validate_network_file(network_df) + + # Filter for car modes + car_modes = ['car', 'car;bike', 'car;walk;bike'] + network_filtered = network_df[network_df['linkModes'].isin(car_modes)] + logging.info(f"Filtered network for car modes. Features remaining: {len(network_filtered):,}") + + # Create GeoDataFrame with projected CRS + gdf = gpd.GeoDataFrame( + network_filtered, + geometry=[ + LineString([Point(row.fromLocationX, row.fromLocationY), + Point(row.toLocationX, row.toLocationY)]) + for idx, row in network_filtered.iterrows() + ], + crs=f"EPSG:{projected_crs_epsg}" + ) + + # Remove coordinate columns as they're now in the geometry + gdf = gdf.drop(columns=['fromLocationX', 'fromLocationY', 'toLocationX', 'toLocationY']) + + # Convert to WGS84 for GeoJSON output + gdf_wgs84 = gdf.to_crs(epsg=4326) + + # Create output path and save file + output_dir = os.path.dirname(network_file) + output_file = os.path.join(output_dir, f"{network_name}.geojson") + + # Save to GeoJSON + gdf_wgs84.to_file(output_file, driver='GeoJSON') + + # Log statistics + logging.info(f"\n[NETWORK] Network statistics:") + logging.info(f"Total features: {len(gdf_wgs84):,}") + logging.info(f"Total network length: {gdf_wgs84['linkLength'].sum() / 1000:.2f} km") + logging.info(f"Unique road types: {gdf_wgs84['attributeOrigType'].nunique()}") + + # Road type distribution + logging.info("\nTop 5 road types distribution:") + road_type_dist = gdf_wgs84['attributeOrigType'].value_counts().head() + for road_type, count in road_type_dist.items(): + logging.info(f" {road_type}: {count:,} links") + + logging.info(f"\n[OUTPUT] GeoJSON file saved to: {output_file}") + return output_file + + except Exception as e: + logging.error(f"Error converting network to GeoJSON: {str(e)}") + raise + + +def parse_arguments(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description='Convert network CSV to GeoJSON format.' + ) + parser.add_argument( + 'network_file', + help='Path to the network CSV file (can be gzipped)' + ) + parser.add_argument( + '--crs', + type=int, + default=32048, + help='EPSG code for the projected CRS (default: 32048)' + ) + return parser.parse_args() + + +def main(): + # Parse command line arguments + args = parse_arguments() + network_file = os.path.expanduser(args.network_file) + network_dir = os.path.dirname(network_file) + network_name = os.path.splitext(os.path.splitext(os.path.basename(network_file))[0])[0] + + # Setup logging + log_file = os.path.join(network_dir, f'network_to_geojson_{network_name}.log') + setup_logging(log_file) + + # Check if network file exists + if not os.path.exists(network_file): + logging.error(f"Network file not found: {network_file}") + return 1 + + try: + convert_network_to_geojson(network_file, projected_crs_epsg=args.crs) + logging.info("Conversion completed successfully") + return 0 + except Exception as e: + logging.error(f"Conversion failed: {str(e)}") + return 1 + + +if __name__ == "__main__": + exit(main()) \ No newline at end of file diff --git a/src/main/python/network_validation/download_osm_network.py b/src/main/python/network_validation/download_osm_network.py new file mode 100644 index 00000000000..ac392eb60c2 --- /dev/null +++ b/src/main/python/network_validation/download_osm_network.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +""" +Download and prepare OSM network data. + +@author: haitamlaarabi, cristian.poliziani, zaneedell +""" +import os +import sys +import pickle +import subprocess + +import osmnx as ox + +from osm_utils import download_and_prepare_osm_network +from osm_utils import check_invalid_coordinates +from osm_utils import scan_network_directories_for_ways +from osm_utils import check_duplicate_edge_ids +from osm_xml import save_graph_xml + +# Get the absolute path to the directory containing this script +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(os.path.dirname(current_dir)) +sys.path.insert(0, parent_dir) + +# Now use absolute import +from python.utils.study_area_config import get_area_config +from python.utils.study_area_config import generate_network_name + + +def main(): + """Main execution function.""" + area = "sfbay" # Options: sfbay, seattle + study_area_config = get_area_config(area) + study_area_config["graph_layers"]["residential"]["min_density_per_km2"] = 5500 # 2855 for sfbay, 412 for seattle + + # Generate configuration name and prepare directory + config_name = generate_network_name(study_area_config) + network_dir = f'{study_area_config["work_dir"]}/network/{config_name}' + os.makedirs(network_dir, exist_ok=True) + + # Define output file paths + graphml_network = f'{network_dir}/{config_name}.graphml' + pkl_network = f'{network_dir}/{config_name}.pkl' + gpkg_network = f'{network_dir}/{config_name}.gpkg' + osm_network = f'{network_dir}/{config_name}.osm' + pbf_network = f'{network_dir}/{config_name}.osm.pbf' + geojson_network = f'{network_dir}/{config_name}.osm.geojson' + + print(f'Downloading and preparing OSM-based {config_name} network...') + g_network = download_and_prepare_osm_network(study_area_config) + + # Check for duplicate edge IDs + nodes, edges = ox.graph_to_gdfs(g_network) + has_duplicates, duplicate_info = check_duplicate_edge_ids(edges, 'edge_id') + + if has_duplicates: + dup_counts, dup_examples = duplicate_info + print(f"\nFound {sum(dup_counts.values())} duplicate edge IDs") + + # Save GraphML and PKL formats + ox.save_graphml(g_network, filepath=graphml_network) + print(f"GRAPHML Network saved to '{graphml_network}'.") + + with open(pkl_network, 'wb') as f: + pickle.dump(g_network, f) + print(f"PKL Network saved to '{pkl_network}'.") + + # Check for invalid coordinates + has_invalid, invalid_nodes = check_invalid_coordinates(g_network) + if has_invalid: + print(f"WARNING: Found {len(invalid_nodes)} nodes with invalid coordinates.") + else: + print("✓ All node coordinates are valid.") + + # Extract nodes and edges as GeoDataFrames and verify CRS + nodes, edges = ox.graph_to_gdfs(g_network) + if nodes.crs != edges.crs: + print("\nWARNING: Nodes and edges have different CRS!") + print(f"Nodes CRS: {nodes.crs}") + print(f"Edges CRS: {edges.crs}") + + # Save GPKG Network + print(f"Converting GraphML Network to GPKG Network...") + ox.save_graph_geopackage(g_network, filepath=gpkg_network) + print(f"GPKG Network saved to '{gpkg_network}'.") + + # Create OSM Network + print(f"Creating OSM Network...") + nodes, edges = ox.graph_to_gdfs(g_network) + edges = edges.drop([ + 'u_original', 'v_original', 'merged_edges', 'osmid' + ], axis=1, errors='ignore') + nodes = nodes.drop([ + 'osmid_original' + ], axis=1, errors='ignore') + + g_osm = ox.graph_from_gdfs(nodes, edges, graph_attrs=g_network.graph) + save_graph_xml( + g_osm, + filepath=osm_network, + edge_tags=[ + 'highway', 'lanes', 'maxspeed', 'name', 'oneway', 'length', + 'tunnel', 'bridge', 'junction', 'edge_id', 'access', 'osm_id' + ], + edge_tag_aggs=[('length', 'sum')] + ) + print(f"OSM Network saved to '{osm_network}'.") + + # Convert to PBF and GeoJSON formats + cmd = f"osmium cat {osm_network} -o {pbf_network} --overwrite --output-format pbf,compression=zlib" + subprocess.run(cmd, shell=True, check=True) + print(f"OSM PBF File saved to '{pbf_network}'") + + cmd2 = f"ogr2ogr -f GeoJSON {geojson_network} {pbf_network} lines" + subprocess.run(cmd2, shell=True, check=True) + print(f"OSM GEOJSON File saved to '{geojson_network}'") + + # Scan network directories for ways + work_dir = study_area_config["work_dir"] + scan_network_directories_for_ways(os.path.expanduser(f'{work_dir}/network')) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/main/python/network_validation/main.py b/src/main/python/network_validation/main.py new file mode 100644 index 00000000000..8a3932c88da --- /dev/null +++ b/src/main/python/network_validation/main.py @@ -0,0 +1,99 @@ +import os +import time + +import pandas as pd +import psutil + + +def load_heavy_csv(file_path, chunk_size=100000): + """ + Load a heavy CSV file efficiently with memory usage tracking. + + Parameters: + ----------- + file_path : str + Path to the CSV file + chunk_size : int + Number of rows to process at a time + + Returns: + -------- + pd.DataFrame or None + The loaded DataFrame or None if file doesn't exist + """ + start_time = time.time() + + # Check if file exists + if not os.path.exists(file_path): + print(f"Error: File {file_path} not found.") + return None + + # Get file size + file_size_bytes = os.path.getsize(file_path) + file_size_mb = file_size_bytes / (1024 * 1024) + print(f"File size: {file_size_mb:.2f} MB") + + # Initial memory usage + process = psutil.Process(os.getpid()) + initial_memory = process.memory_info().rss / (1024 * 1024) + print(f"Initial memory usage: {initial_memory:.2f} MB") + + try: + # For very large files, use chunking + if file_size_mb > 500: # If file is larger than 500MB + print(f"Loading large file in chunks of {chunk_size} rows...") + chunks = [] + for i, chunk in enumerate(pd.read_csv(file_path, chunksize=chunk_size)): + chunks.append(chunk) + if (i + 1) % 10 == 0: + current_memory = process.memory_info().rss / (1024 * 1024) + print(f"Processed {(i + 1) * chunk_size} rows. Current memory usage: {current_memory:.2f} MB") + + df = pd.concat(chunks, ignore_index=True) + else: + print("Loading file into memory...") + df = pd.read_csv(file_path) + + # Final memory usage + final_memory = process.memory_info().rss / (1024 * 1024) + memory_increase = final_memory - initial_memory + + # Print statistics + print(f"CSV loaded successfully in {time.time() - start_time:.2f} seconds") + print(f"Rows: {len(df)}, Columns: {len(df.columns)}") + print(f"Final memory usage: {final_memory:.2f} MB (increased by {memory_increase:.2f} MB)") + + # Display first few rows and column info + print("\nFirst 5 rows:") + print(df.head()) + + print("\nColumn information:") + print(df.dtypes) + + return df + + except Exception as e: + print(f"Error loading CSV: {str(e)}") + return None + + +if __name__ == "__main__": + # Testing H5 Data + # url = "https://console.cloud.google.com/storage/browser/_details/beam-core-outputs/urbansim-inputs/custom_mpo_06197001_model_data_2017.h5;tab=live_object?project=beam-core" + # url = "https://storage.googleapis.com/beam-core-outputs/urbansim-inputs/custom_mpo_06197001_model_data_2017.h5" + # h5_path = os.path.expanduser("~/Workspace/Simulation/sfbay/urbansim/custom_mpo_06197001_model_data.h5") + # download_h5_data(url, h5_path) + + # Loading an events file + # Replace with your CSV file path + file_path = os.path.expanduser("~/Workspace/Models/pilates/0.events.csv.gz") + out_path = os.path.expanduser("~/Downloads/0.events.p3048964.csv") + #300181 + df = load_heavy_csv(file_path) + + if df is not None: + # Perform additional operations with the dataframe here + print("\nMemory usage of DataFrame:") + print(f"{df.memory_usage(deep=True).sum() / (1024 * 1024):.2f} MB") + + diff --git a/src/main/python/network_validation/network_validation.ipynb b/src/main/python/network_validation/network_validation.ipynb index 4b751b5919c..0f7903fd817 100644 --- a/src/main/python/network_validation/network_validation.ipynb +++ b/src/main/python/network_validation/network_validation.ipynb @@ -259,7 +259,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.7" + "version": "3.12.7" } }, "nbformat": 4, diff --git a/src/main/python/network_validation/osm_utils.py b/src/main/python/network_validation/osm_utils.py new file mode 100644 index 00000000000..7b688e36df0 --- /dev/null +++ b/src/main/python/network_validation/osm_utils.py @@ -0,0 +1,1640 @@ +import hashlib +import os +import sys +from collections import Counter, defaultdict +from statistics import mean +from statistics import median + +import geopandas as gpd +import networkx as nx +import osmium +import osmnx as ox +import pandas as pd +import pyproj +import shapely.geometry +from osmnx import settings +from osmnx import truncate +from shapely.ops import unary_union + +from _data_collection_utils import collect_census_data +from _data_collection_utils import collect_geographic_boundaries +from _data_collection_utils import filter_boundaries_by_density + +# Get the absolute path to the directory containing this script +current_dir = os.path.dirname(os.path.abspath(__file__)) + +# Go up to the parent directory that contains the 'python' directory +# If your file is in /path/to/python/freight/frism_to_beam_freight_plans.py +# This will add /path/to to sys.path +parent_dir = os.path.dirname(os.path.dirname(current_dir)) +sys.path.insert(0, parent_dir) + + +def process_ferry_edges(ferry_graph) -> nx.MultiDiGraph: + """Process ferry edges to make them compatible with car network""" + if ferry_graph.number_of_edges() == 0: + print("No ferry edges found in the graph.") + return nx.MultiDiGraph() + + # Extract nodes and edges + ferry_nodes, ferry_edges = ox.graph_to_gdfs(ferry_graph) + print(f"Total ferry edges: {len(ferry_edges)}") + + # Print available columns to debug + print(f"Available columns: {ferry_edges.columns.tolist()}") + + # Create default masks - assume access is allowed unless explicitly denied + # This is more lenient and works better with OSM data which often lacks explicit tags + car_mask = pd.Series(True, index=ferry_edges.index) + + # Check for explicit denials first + if 'motorcar' in ferry_edges.columns: + car_mask &= ~(ferry_edges['motorcar'] == 'no') + print(f"After motorcar check: {car_mask.sum()} car-accessible edges") + + if 'motor_vehicle' in ferry_edges.columns: + motor_vehicle_denied = ferry_edges['motor_vehicle'] == 'no' + car_mask &= ~motor_vehicle_denied + print(f"After motor_vehicle check: {car_mask.sum()} car-accessible edges") + + # Select ferry edges that allow passenger cars + selected_edges = ferry_edges[car_mask].copy() + + if selected_edges.empty: + print("No ferry routes found that allow passenger cars") + return nx.MultiDiGraph() + + print(f"Found {len(selected_edges)} suitable ferry edges") + + # Set ferry attributes + selected_edges['reversed'] = False + selected_edges['maxspeed'] = "10 mph" + selected_edges['highway'] = "unclassified" + selected_edges['oneway'] = "no" + selected_edges['lanes'] = "2" + selected_edges["hgv"] = False # Mark as not accessible to heavy-duty + selected_edges["mdv"] = True # Mark as accessible to medium-duty + + # Keep only nodes that are used by the filtered edges + used_nodes = set(selected_edges.index.get_level_values(0)).union( + set(selected_edges.index.get_level_values(1)) + ) + selected_nodes = ferry_nodes.loc[list(used_nodes)] + + # Reconstruct graph and project + g_ferry_reconstructed = ox.graph_from_gdfs(selected_nodes, selected_edges) + + return g_ferry_reconstructed + + +def convert_weight(value: float, from_unit: str, to_unit: str) -> float: + """Convert weight between different units.""" + # Conversion factors + conversions = { + "lbs_to_kg": 0.453592, + "kg_to_lbs": 2.20462, + "tons_to_kg": 1000, + "kg_to_tons": 0.001 + } + + if from_unit == to_unit: + return value + + conversion_key = f"{from_unit}_to_{to_unit}" + if conversion_key in conversions: + return value * conversions[conversion_key] + + # Handle two-step conversions if needed + if from_unit == "lbs" and to_unit == "tons": + return value * conversions["lbs_to_kg"] * conversions["kg_to_tons"] + if from_unit == "tons" and to_unit == "lbs": + return value * conversions["tons_to_kg"] * conversions["kg_to_lbs"] + + raise ValueError(f"Unsupported conversion from {from_unit} to {to_unit}") + + +def standardize_weight(weight_str: str, target_unit: str) -> float: + """Convert weight string to numeric value in target unit.""" + if pd.isna(weight_str): + return None + + # Handle numeric-only strings (assume they're in target unit) + if str(weight_str).replace('.', '').isdigit(): + return float(weight_str) + + # Extract number and unit from string + import re + + # Enhanced pattern to match more formats: + # - Numbers with optional decimal point + # - Various unit formats: tons, ton, t, kg, lbs, lb, st (stone) + match = re.match(r'(\d+\.?\d*)\s*(tons?|t|kg|lbs?|st|stone)', str(weight_str).lower()) + if not match: + # Try again with a simpler pattern in case the unit is missing or unusual + number_match = re.match(r'(\d+\.?\d*)', str(weight_str)) + if number_match: + # If we can extract just a number, assume it's in the target unit + return float(number_match.group(1)) + return None + + value, unit = match.groups() + value = float(value) + + # Standardize unit names + unit_mapping = { + 't': 'tons', + 'ton': 'tons', + 'lb': 'lbs', + 'kg': 'kg', + 'st': 'stone', + 'stone': 'stone' + } + unit = unit_mapping.get(unit, unit) + + # Convert to standard unit first (kg), then to target unit + # Conversion factors to kg + to_kg = { + 'lbs': 0.453592, + 'kg': 1.0, + 'tons': 1000.0, + 'stone': 6.35029 # 1 stone = 14 lbs = 6.35029 kg + } + + # Convert to kg first + weight_in_kg = value * to_kg.get(unit, 1.0) + + # Then convert from kg to target unit + from_kg = { + 'lbs': 2.20462, + 'kg': 1.0, + 'tons': 0.001 + } + + # If target unit isn't recognized, default to kg + conversion_factor = from_kg.get(target_unit, 1.0) + + return weight_in_kg * conversion_factor + + +def standardize_oneway(value): + """ + Standardize oneway tag to "yes", "reverse", or "no" strings to match MATSim expectations. + - "yes" for forward direction oneway + - "reverse" for backward direction oneway + - "no" for bidirectional + """ + # Return None if the value is None or empty + if value is None or value == '': + return "no" + + # Values that explicitly mean "yes" (forward oneway) + valid_yes = {'yes', 'true', '1', True, 1} + + # Values that explicitly mean reverse oneway + valid_reverse = {'-1', 'reverse'} + + # Values that explicitly mean "no" + valid_no = {'no', 'false', '0', False, 0} + + # Handle strings + if isinstance(value, str): + value = value.lower().strip() + # Handle semicolon-separated values + if ';' in value: + parts = [part.strip().lower() for part in value.split(';')] + if all(part in valid_yes for part in parts): + return "yes" + elif all(part in valid_reverse for part in parts): + return "reverse" + else: + return "no" + + # Handle single string + if value in valid_yes: + return "yes" + elif value in valid_reverse: + return "reverse" + elif value in valid_no: + return "no" + else: + # If we can't interpret it, MATSim logs a warning and ignores it + return "no" + + # Handle boolean and numeric + if isinstance(value, (bool, int)): + if value in valid_yes: + return "yes" + else: + return "no" + + # Handle lists (if that's a use case) + if isinstance(value, list): + if all(str(v).lower().strip() in valid_yes for v in value if v): + return "yes" + elif all(str(v).lower().strip() in valid_reverse for v in value if v): + return "reverse" + else: + return "no" + + # Default case + return "no" + + +def standardize_motor_vehicle(value): + """ + Standardize motor_vehicle tag to "yes" or "no" strings, focusing on a defined set of restrictive values. + + Parameters: + ----------- + value : any + The motor_vehicle tag value + + Returns: + -------- + str + "no" if motor vehicles are restricted (no, false, 0, private) + "yes" otherwise + """ + # Define restrictive values + restrictive_values = {"no", "false", "0"} + + # If value is None, NaN, or empty, assume motor vehicles are allowed + if value is None or pd.isna(value) or (isinstance(value, str) and not value.strip()): + return "yes" + + # Convert to string and lowercase for consistent processing + if not isinstance(value, str): + value = str(value) + + value = value.lower().strip() + + import re + # Handle special cases with multiple values (separated by semicolons or vertical bars) + if ';' in value or '|' in value: + # Split by either semicolon or vertical bar + parts = re.split(r'[;|]+', value) + parts = [p.strip() for p in parts if p.strip()] + + # If any part is in the restrictive values, the overall value is "no" + if any(p in restrictive_values for p in parts): + return "no" + else: + return "yes" + + # Check if the value is in the restrictive set + if value in restrictive_values: + return "no" + + # All other values indicate some form of access + return "yes" + + +def standardize_maxspeed(value, default_kph=None): + """ + Standardize maxspeed values and return them in the format "25 mph". + + Parameters: + ----------- + value : any + The maxspeed tag value + default_kph : int, optional + Default speed in kph to use if the value can't be parsed + + Returns: + -------- + str or None + Speed in format "XX mph", or None if the value can't be parsed and no default is provided + """ + if value is None or pd.isna(value) or (isinstance(value, str) and not value.strip()): + if default_kph is not None: + return f"{round(default_kph / 1.60934)} mph" # Convert kph to mph + return None + + # Convert to string for processing + if not isinstance(value, str): + value = str(value) + + value = value.lower().strip() + + # Handle special cases + if value == "signals" or value == "none" or value == "variable": + if default_kph is not None: + return f"{round(default_kph / 1.60934)} mph" # Convert kph to mph + return None + + import re + # Try to extract numeric value and unit + match = re.match(r'^(\d+(?:\.\d+)?)\s*(mph|kmh|km/h|kph)?$', value) + if match: + speed_val = float(match.group(1)) + unit = match.group(2) if match.group(2) else "kph" # Default to kph if no unit + + # Convert to mph if necessary + if unit in ["kmh", "km/h", "kph"]: + speed_mph = round(speed_val / 1.60934) # Convert kph to mph + else: + # Already in mph + speed_mph = round(speed_val) + + return f"{speed_mph} mph" + + # If we can't parse the value and have a default + if default_kph is not None: + return f"{round(default_kph / 1.60934)} mph" # Convert kph to mph + + # If we can't parse the value and don't have a default + return None + + +def standardize_access(value): + """ + Standardize access tag to "yes" or "no" strings, focusing on a defined set of restrictive values. + + Parameters: + ----------- + value : any + The access tag value + + Returns: + -------- + str + "no" if access is restricted (no, private, forestry, permit, etc.) + "yes" otherwise + """ + # Define restrictive values - values that indicate restricted access + restrictive_values = {"no", "false", "0"} + + # If value is None, NaN, or empty, assume access is allowed + if value is None or pd.isna(value) or (isinstance(value, str) and not value.strip()): + return "yes" + + # Convert to string and lowercase for consistent processing + if not isinstance(value, str): + value = str(value) + + value = value.lower().strip() + + import re + # Handle special cases with multiple values (separated by semicolons or vertical bars) + if ';' in value or '|' in value: + # Split by either semicolon or vertical bar + parts = re.split(r'[;|]+', value) + parts = [p.strip() for p in parts if p.strip()] + + # If any part is in the restrictive values, the overall value is "no" + if any(p in restrictive_values for p in parts): + return "no" + else: + return "yes" + + # Check if the value is in the restrictive set + if value in restrictive_values: + return "no" + + # All other values (yes, permissive, etc.) indicate general access + return "yes" + + +def standardize_hgv(value): + """ + Standardize HGV access values to boolean (True/False) + Returns True if HGVs are allowed, False if they are not + """ + if not value: + return True # Default to allowed if no value + + # Values that indicate HGV prohibition + restrictive_values = {"no", "false", "0"} + + # Handle boolean inputs + if isinstance(value, bool): + return value + + # Handle semicolon-separated string values + if isinstance(value, str) and ';' in value: + # If any part is "no", the whole is restricted + for part in value.split(';'): + if part.strip().lower() in restrictive_values: + return False + return True + + # Handle list case + if isinstance(value, list): + if not value: + return True + # If any value is "no", the whole is restricted + for v in value: + if str(v).strip().lower() in restrictive_values: + return False + return True + + # Handle single string value + if isinstance(value, str): + return str(value).strip().lower() not in restrictive_values + + # For any other case, convert to string and check + return str(value).strip().lower() not in restrictive_values + + +def process_tags(_g: nx.MultiDiGraph, config: dict) -> nx.MultiDiGraph: + """Process vehicle classifications based on FHWA weight classes.""" + print("Processing vehicle classifications...") + + # Get weight limits and unit from config + weight_config = config["weight_limits"] + target_unit = weight_config["unit"] + mdv_max = weight_config["mdv_max"] + hdv_max = weight_config["hdv_max"] + + # Get graph data while preserving MultiIndex + nodes, edges = ox.graph_to_gdfs(_g) + + # Standardize tags + edges['oneway'] = edges['oneway'].apply(standardize_oneway) + edges['motor_vehicle'] = edges['motor_vehicle'].apply(standardize_motor_vehicle) + edges['maxspeed'] = edges['maxspeed'].apply(standardize_maxspeed) + edges['access'] = edges['access'].apply(standardize_access) + # Initialize hgv and mdv as True by default if they don't exist + edges["mdv"] = True + if "hgv" not in edges.columns: + edges["hgv"] = True + edges["hgv"] = edges["hgv"].apply(standardize_hgv) + + # Copy HGV weight restrictions if present + if "maxweight:hgv" in edges.columns: + hgv_mask = ~edges["maxweight:hgv"].isna() + if hgv_mask.any(): + edges.loc[hgv_mask, "maxweight"] = edges.loc[hgv_mask, "maxweight:hgv"].copy() + + if "maxweight" in edges.columns: + print("Processing weight restrictions...") + # Convert weights to standard unit specified in config + edges["maxweight"] = edges["maxweight"].apply(lambda x: standardize_weight(x, target_unit)) + + # Update hgv and mdv based on weight restrictions + # Medium-duty vehicles are restricted when weight is below MDV limit + mdv_restricted_mask = edges["maxweight"].notna() & (edges["maxweight"] <= mdv_max) + edges.loc[mdv_restricted_mask, "mdv"] = False + + # Heavy-duty vehicles are restricted when weight is below HDV limit + # Create a mask for MDVs being restricted + mdv_is_restricted = edges["mdv"] == False + # Combine masks properly + hdv_restricted_mask = mdv_is_restricted | (edges["maxweight"].notna() & (edges["maxweight"] <= hdv_max)) + edges.loc[hdv_restricted_mask, "hgv"] = False + + # Process other restrictions like maxlength + if "maxlength" in edges.columns: + # If maxlength is set, assume heavy vehicles are restricted + length_restricted_mask = ~edges["maxlength"].isna() + edges.loc[length_restricted_mask, "hgv"] = False + + # Ensure hgv, mdv and oneway are strictly boolean + edges["hgv"] = edges["hgv"].astype(bool) + edges["mdv"] = edges["mdv"].astype(bool) + + # Convert back to MultiDiGraph + g_updated = ox.graph_from_gdfs(nodes, edges) + + return g_updated + + +def create_unique_edge_id(u, v, osmid, k=None): + """ + Create a unique edge ID by combining start node, end node, and osmid. + + Parameters: + ----------- + u : node ID of the edge's source + v : node ID of the edge's target + osmid : original OSM way ID + k : optional key for MultiDiGraphs (default: None) + + Returns: + -------- + str : A unique edge identifier + """ + # Handle the case where osmid might be a list + if isinstance(osmid, list): + osmid_str = '_'.join(map(str, osmid)) + else: + osmid_str = str(osmid) + + # Include the key if provided (for MultiDiGraphs) + if k is not None: + unique_id = f"{u}_{v}_{k}_{osmid_str}" + else: + unique_id = f"{u}_{v}_{osmid_str}" + + # Optionally hash it if you want a shorter fixed-length ID + hash_object = hashlib.md5(unique_id.encode()) + return hash_object.hexdigest()[:12] # 12 characters should be sufficient + + +def find_long_tags_in_gdf(gdf, element_type="elements"): + """ + Find columns and combinations of attributes that exceed 250 characters in a GeoDataFrame. + + Parameters: + ----------- + gdf : GeoDataFrame + The input GeoDataFrame (can be either nodes or edges) + element_type : str, optional + The type of elements being analyzed ("nodes" or "edges") for output messages + + Returns: + -------- + tuple + (long_tags, long_comb_tags) where: + - long_tags: dict of individual columns with values >= 250 characters + - long_comb_tags: dict of rows with combined attribute length >= 250 characters + """ + print(f"\nAnalyzing {element_type}...") + + # Find individual columns with values longer than 250 characters + long_tags = {} + for column in gdf.columns: + # Convert all values to strings and check their lengths + max_length = gdf[column].astype(str).str.len().max() + if max_length >= 250: + long_tags[column] = max_length + + # Print results for individual columns + if long_tags: + print(f"\nIndividual {element_type} columns with values >= 250 characters:") + for column, length in long_tags.items(): + print(f"Column '{column}': max length = {length} characters") + # Print an example of a long value + long_value_idx = gdf[column].astype(str).str.len().idxmax() + print(f"Example long value: {gdf[column].iloc[long_value_idx]}\n") + else: + print(f"No individual {element_type} columns found with values >= 250 characters") + + # Find combinations of attributes that exceed 250 characters + print(f"\nChecking {element_type} attribute combinations...") + # Get all rows where any combination of attributes might be long + long_comb_tags = {} + for idx, row in gdf.iterrows(): + comb_length = 0 + contributing_cols = [] + + for col in gdf.columns: + value = str(row[col]) + if len(value) > 0 and value.lower() != 'nan': # Skip empty or NaN values + value_length = len(value) + comb_length += value_length + if value_length > 0: # Only add if the value has length + contributing_cols.append({ + 'column': col, + 'length': value_length, + 'value': value + }) + + if comb_length >= 250: + long_comb_tags[idx] = { + 'total_length': comb_length, + 'contributing_columns': contributing_cols + } + + # Print results for combinations + if long_comb_tags: + print(f"\n{element_type.capitalize()} rows with combined attribute length >= 250 characters:") + for idx, info in long_comb_tags.items(): + print(f"\nRow {idx}:") + print(f"Total combined length: {info['total_length']} characters") + print("Contributing columns:") + for col_info in info['contributing_columns']: + print(f"- {col_info['column']}: length={col_info['length']} chars") + if col_info['length'] > 50: # Show value only if it's significantly long + print(f" Value: {col_info['value'][:50]}...") # Show first 50 chars + else: + print(f"No combinations of {element_type} attributes found exceeding 250 characters") + + return long_tags, long_comb_tags + + +def meters_to_degrees(lon, lat, utm_epsg, buffer_meters): + """ + Calculate the equivalent buffer distance in degrees for a given buffer in meters, + using a specified UTM projection for better precision. + + Parameters: + ----------- + lon : float + Longitude coordinate (x) in WGS84 + lat : float + Latitude coordinate (y) in WGS84 + utm_epsg : int + The EPSG code for the UTM coordinate reference system (e.g., 26910 for UTM Zone 10N) + buffer_meters : float + Buffer distance in meters + + Returns: + -------- + float + Equivalent buffer distance in degrees + """ + # Create UTM CRS from EPSG code + utm_crs = f"EPSG:{utm_epsg}" + + # Create transformers + wgs84_to_utm = pyproj.Transformer.from_crs("EPSG:4326", utm_crs, always_xy=True) + utm_to_wgs84 = pyproj.Transformer.from_crs(utm_crs, "EPSG:4326", always_xy=True) + + # Convert coordinates to UTM + x_utm, y_utm = wgs84_to_utm.transform(lon, lat) + + # Calculate points at buffer distance in cardinal directions + east_utm = (x_utm + buffer_meters, y_utm) + north_utm = (x_utm, y_utm + buffer_meters) + + # Convert buffered points back to WGS84 + east_lon, east_lat = utm_to_wgs84.transform(*east_utm) + north_lon, north_lat = utm_to_wgs84.transform(*north_utm) + + # Calculate degree differences + lon_diff = abs(east_lon - lon) # East-West difference (longitude) + lat_diff = abs(north_lat - lat) # North-South difference (latitude) + + # Return the average as an approximation + # You could also return both separately if you need different buffers for lat/lon + return (lon_diff + lat_diff) / 2 + + +def to_convex_hull(input_data, utm_epsg, buffer_in_meters): + """ + Create a buffered convex hull from input data. + + Parameters: + ----------- + input_data : GeoDataFrame, GeoSeries, or Shapely geometry + The input geographic data + utm_epsg : int + EPSG code for the UTM projection to use for accurate distance calculations + buffer_in_meters : float + Buffer distance in meters + + Returns: + -------- + Shapely geometry + The buffered convex hull + """ + # Handle different input types + if isinstance(input_data, gpd.GeoDataFrame): + # GeoDataFrame: get the convex hull of all geometries + convex_hull = input_data.geometry.unary_union.convex_hull + elif isinstance(input_data, gpd.GeoSeries): + # GeoSeries: get the convex hull of all geometries + convex_hull = input_data.unary_union.convex_hull + elif hasattr(input_data, 'geom_type'): + # Shapely geometry: get its convex hull + convex_hull = input_data.convex_hull + else: + raise TypeError("Input must be a GeoDataFrame, GeoSeries, or Shapely geometry") + + # Get centroid + lon = convex_hull.centroid.x + lat = convex_hull.centroid.y + + # Convert buffer distance + buffer_in_degrees = meters_to_degrees(lon, lat, utm_epsg, buffer_in_meters) + + # Buffer in degrees + buffered_convex_hull = convex_hull.buffer(buffer_in_degrees) + + return buffered_convex_hull + + +def adjust_and_add_graph(graphs, current_graph): + # Get nodes and edges of current graph + current_nodes, current_edges = ox.graph_to_gdfs(current_graph) + + # Collect all unique columns from existing graphs + existing_columns = set() + for existing_graph in graphs: + _, existing_edges = ox.graph_to_gdfs(existing_graph) + existing_columns.update(existing_edges.columns) + + # Add missing columns to current graph's edges + for col in existing_columns: + if col not in current_edges.columns: + current_edges[col] = "" + + # Also ensure existing graphs have columns from current graph + current_columns = set(current_edges.columns) + for i, existing_graph in enumerate(graphs): + existing_nodes, existing_edges = ox.graph_to_gdfs(existing_graph) + + columns_added = False + for col in current_columns: + if col not in existing_edges.columns: + existing_edges[col] = "" + columns_added = True + + # Only rebuild the graph if columns were added + if columns_added: + graphs[i] = ox.graph_from_gdfs(existing_nodes, existing_edges) + + # Add the graph to the list if it has edges + graphs.append(ox.graph_from_gdfs(current_nodes, current_edges)) + + +def median_lanes(values): + """ + Calculate median after converting string values to numbers. + Handles: + - Lists of values + - Semicolon-separated values + - Mixed numeric types + """ + # Initialize empty list for numeric values + numeric_values = [] + + # Handle case where values is already a single value, not an iterable + if isinstance(values, (int, float)): + return int(values) + elif isinstance(values, str): + values = [values] + + # Process each value in the iterable + for v in values: + # Skip None values + if v is None: + continue + + # Handle different types + if isinstance(v, (int, float)): + numeric_values.append(int(v)) + continue + + if not isinstance(v, str): + v = str(v) + + # Split by semicolon to handle multiple values + parts = v.split(';') + for part in parts: + part = part.strip() + try: + numeric_values.append(int(part)) + except (ValueError, TypeError): + # Skip non-numeric parts + continue + + if not numeric_values: + return None + return int(median(numeric_values)) + + +def most_restrictive_access(values): + """ + Returns the most restrictive access value from a list based on a predefined priority order. + + Parameters: + ----------- + values : list + List of access values + + Returns: + -------- + str + The most restrictive access value, or None if no valid values + """ + # Define a priority order for access restrictions (from most to least restrictive) + priority = { + "no": 1, # Most restrictive + "private": 2, + "permit": 3, + "destination": 4, + "delivery": 5, + "customers": 6, + "forestry": 7, + "agricultural": 8, + "discouraged": 9, + "permissive": 10, + "yes": 11 # Least restrictive + } + + # Default priority for unknown values - place between "discouraged" and "permissive" + default_priority = 9.5 + + if not values: + return None + + # Process each value and find the most restrictive + most_restrictive = None + highest_priority = float('inf') # Lower number = higher priority + + for value in values: + if value is None or value == "nan" or pd.isna(value): + continue + + if isinstance(value, str): + value = value.strip().lower() + if not value or value == "nan": + continue + + # Get priority for this value + value_priority = priority.get(value, default_priority) + + # Update most restrictive if this has higher priority (lower number) + if value_priority < highest_priority: + most_restrictive = value + highest_priority = value_priority + + return most_restrictive + + +def bool_all(values): + """ + Returns False if any value is False, otherwise returns True. + Expects only boolean values (True or False). + + Parameters: + ----------- + values : list + List of boolean values + + Returns: + -------- + bool + False if any value is False, True otherwise + """ + if not values: + return None + + # If any value is False, return False + return all(values) + + +def mean_maxspeed(speed_values): + """ + Calculate the mean speed from a list of speed values in the format "XX mph". + + Parameters: + ----------- + speed_values : list + List of speed values in format "XX mph" + + Returns: + -------- + str + Mean speed in format "XX mph", or None if no valid speeds found + """ + if not speed_values: + return None + + # Extract numeric values + speeds_mph = [] + import re + + for value in speed_values: + if not value or pd.isna(value): + continue + + # Convert to string if needed + if not isinstance(value, str): + value = str(value) + + # Extract the numeric part + match = re.match(r'^(\d+(?:\.\d+)?)\s*mph$', value.lower().strip()) + if match: + speeds_mph.append(float(match.group(1))) + + # Calculate mean if we have valid values + if speeds_mph: + mean_speed = mean(speeds_mph) + return f"{round(mean_speed)} mph" + + return None + + +def yes_no_all(values): + """ + Returns "no" if any value is "no", otherwise returns "yes". + Expects string values ("yes" or "no"). + + Parameters: + ----------- + values : list + List of string values ("yes" or "no") + + Returns: + -------- + str + "no" if any value is "no", "yes" otherwise + """ + if not values: + return None + + # If any value is "no", return "no" + return "no" if "no" in values else "yes" + + +def project_graph(G: nx.MultiDiGraph, to_crs=None, to_latlong=False) -> nx.MultiDiGraph: + """ + Project a graph from its current CRS to another. + + If `to_latlong` is True, this projects the graph to the coordinate + reference system defined by `settings.default_crs`. Otherwise it projects + it to the CRS defined by `to_crs`. If `to_crs` is `None`, it projects it + to the CRS of an appropriate UTM zone given `geometry`'s bounds. + + Parameters + ---------- + G + The graph to be projected. + to_crs + If None, project to an appropriate UTM zone. Otherwise project to + this CRS. + to_latlong + If True, project to `settings.default_crs` and ignore `to_crs`. + + Returns + ------- + G_proj + The projected graph. + """ + if to_latlong: + to_crs = settings.default_crs + + # STEP 1: PROJECT THE NODES + gdf_nodes = ox.convert.graph_to_gdfs(G, edges=False) + + # project the nodes GeoDataFrame and extract the projected x/y values + gdf_nodes_proj = ox.projection.project_gdf(gdf_nodes, to_crs=to_crs) + gdf_nodes_proj["x"] = gdf_nodes_proj["geometry"].x + gdf_nodes_proj["y"] = gdf_nodes_proj["geometry"].y + to_crs = gdf_nodes_proj.crs + + # STEP 2: PROJECT THE EDGES + # Always get edges with geometry, regardless of whether the graph is simplified + gdf_edges = ox.convert.graph_to_gdfs(G, nodes=False, fill_edge_geometry=True) + + # If edges don't have a CRS but do have geometry, assign the source CRS + if gdf_edges.crs is None and not gdf_edges.empty and 'geometry' in gdf_edges.columns: + # If we're unsure about the source CRS, use what we know from the nodes + source_crs = G.graph.get('crs', gdf_nodes.crs) + if source_crs is not None: + gdf_edges.crs = source_crs + print(f"Setting edge CRS to {source_crs} before projection") + + # Project the edges + gdf_edges_proj = ox.projection.project_gdf(gdf_edges, to_crs=to_crs) + + # Debug output to verify projection worked + if not gdf_edges_proj.empty and 'geometry' in gdf_edges_proj.columns: + sample_geom = gdf_edges_proj.iloc[0]['geometry'] + if sample_geom is not None: + print(f"Sample edge coordinate after projection: {next(iter(sample_geom.coords))}") + + # STEP 3: REBUILD GRAPH + # turn projected node/edge gdfs into a graph and update its CRS attribute + G_proj = ox.convert.graph_from_gdfs(gdf_nodes_proj, gdf_edges_proj, graph_attrs=G.graph) + G_proj.graph["crs"] = to_crs + + print(f"Projected graph with {len(G)} nodes and {len(G.edges)} edges") + + # Final verification + nodes_check, edges_check = ox.convert.graph_to_gdfs(G_proj) + print(f"Verified: Nodes CRS: {nodes_check.crs}, Edges CRS: {edges_check.crs}") + + return G_proj + + +def download_and_prepare_osm_network(_study_area_config: dict) -> nx.MultiDiGraph: + """Download and prepare OSM network based on study area configuration.""" + print("=== Starting OSM Network Download and Preparation ===") + + # Apply OSMNX settings + for setting, value in _study_area_config["osmnx_settings"].items(): + setattr(ox.settings, setting, value) + print("✓ OSMNX settings applied") + + # List to store the graphs + graphs = [] + + # Set up study area parameters + study_area = _study_area_config['study_area'] + base_name = f"{_study_area_config['work_dir']}/geo/{study_area}" + census_year = _study_area_config["census_year"] + utm_epsg = _study_area_config["utm_epsg"] + state_fips_code = _study_area_config["state_fips"] + county_fips_codes = _study_area_config["county_fips"] + tolerance = _study_area_config["tolerance"] + + print(f"Collecting {study_area} boundaries...") + + # Process each layer defined in the configuration + for layer_name, layer_config in _study_area_config["graph_layers"].items(): + # Get layer configuration + geo_level = layer_config["geo_level"] + min_density = layer_config.get("min_density_per_km2", 0) + custom_filter = layer_config["custom_filter"] + buffer_in_meters = layer_config["buffer_zone_in_meters"] + + # Create the region boundary GeoDataFrame + region_boundary_wgs84 = collect_geographic_boundaries( + state_fips_code=state_fips_code, + county_fips_codes=county_fips_codes, + year=census_year, + study_area_boundary_geo_path=f"{base_name}_{geo_level}_{census_year}_wgs84.geojson", + geo_level=geo_level + ) + + # Process specific layer types + if layer_name == "main": + print(f"Processing {layer_name} layer") + graph_layer = to_convex_hull(region_boundary_wgs84, utm_epsg, buffer_in_meters) + network_type = "drive" + simplify = False + retain_all = True + truncate_by_edge = True + + elif layer_name == "residential": + density_info = f" with minimum density: {min_density} pop/km²" if min_density > 0 else "" + print(f"Processing {layer_name} layer{density_info}") + + # Get population data + pop_data = collect_census_data( + state_fips_code, + county_fips_codes, + census_year, + census_data_file=f"{base_name}_acs_census_{geo_level}_{census_year}.csv", + geo_level=geo_level + ) + + filtered_boundaries = filter_boundaries_by_density( + region_boundary_wgs84, + pop_data, + utm_epsg, + geo_level, + min_density, + density_geo_file=f"{base_name}_{geo_level}_{census_year}_{min_density}ppsk_wgs84.geojson", + ) + + graph_layer = shapely.ops.unary_union([ + to_convex_hull(geom, utm_epsg, buffer_in_meters) for geom in filtered_boundaries.geometry + ]) + + network_type = "drive" + simplify = False + retain_all = True + truncate_by_edge = True + + elif layer_name == "ferry": + print(f"Processing ferry layer to connect islands...") + graph_layer = to_convex_hull(region_boundary_wgs84, utm_epsg, buffer_in_meters) + network_type = "all" + simplify = True + retain_all = True + truncate_by_edge = False + + else: + raise ValueError(f"Invalid layer name: {layer_name}") + + print("✓ Boundaries collected and unified") + + # Download OSM Network + print(f"Downloading OSM network with filter: {custom_filter}") + g = ox.graph_from_polygon( + graph_layer, + network_type=network_type, + simplify=simplify, + retain_all=retain_all, + truncate_by_edge=truncate_by_edge, + custom_filter=custom_filter + ) + print(f"✓ Downloaded network with {g.number_of_nodes()} nodes and {g.number_of_edges()} edges") + + # Special processing for ferry network + if layer_name == "ferry": + g = process_ferry_edges(g) + if g.number_of_edges() > 0: + print(f"✓ Processed {g.number_of_edges()} ferry connections") + else: + print("✗ No suitable ferry connections found") + continue # Skip adding this empty graph + + # Add the graph to the list if it has edges + adjust_and_add_graph(graphs, g) + + # Combine all graphs + print("=== Processing Combined Network ===") + g_combined = nx.compose_all(graphs) + print(f"✓ Combined network has {g_combined.number_of_nodes()} nodes and {g_combined.number_of_edges()} edges") + + # Project to UTM for processing + print("Projecting graph to UTM...") + g_projected = project_graph(g_combined, to_crs=utm_epsg) + print("✓ Network projected to UTM") + + # Add edge speeds + print("Adding edge speeds...") + g_with_speeds = ox.add_edge_speeds(g_projected) + print("✓ Edge speeds added") + + # Process tags for vehicle types + print("Processing tags...") + g_processed_tags = process_tags(g_with_speeds, _study_area_config) + print("✓ Edge tags processed") + + # Consolidate intersections + print("Consolidating intersections...") + g_consolidated = ox.consolidate_intersections( + g_processed_tags, + tolerance=tolerance, + rebuild_graph=True, + dead_ends=True, + reconnect_edges=True + ) + print("✓ Intersections consolidated") + + # Simplify the graph + print("Simplifying graph...") + g_simplified = ox.simplification.simplify_graph( + g_consolidated, + edge_attrs_differ=["highway", "lanes", "maxspeed"], + remove_rings=False, + track_merged=True, + edge_attr_aggs={ + "length": sum, + "travel_time": sum, + "hgv": bool_all, + "mdv": bool_all, + "lanes": median_lanes, + "speed_kph": mean, + "maxspeed": mean_maxspeed, + "oneway": yes_no_all, + "access": yes_no_all, + "reversed": bool_all, + "maxweight": min + } + ) + print("✓ Network simplified") + + # Create unique edge IDs + nodes, edges = ox.graph_to_gdfs(g_simplified) + edges['edge_id'] = edges.apply( + lambda row: create_unique_edge_id(row['u_original'], row['v_original'], row['osmid'], row.get('key', None)), + axis=1 + ) + g_hashed = ox.graph_from_gdfs(nodes, edges) + print("✓ Unique edge IDs created") + + # Project back to WGS84 + print("Projecting to WGS84...") + g_wgs84 = project_graph(g_hashed, to_latlong=True) + print("✓ Projected to WGS84") + + # Find largest connected component + print("Finding largest connected component...") + g_connected = ox.truncate.largest_component(g_wgs84) + print(f"✓ Final network has {g_connected.number_of_nodes()} nodes and {g_connected.number_of_edges()} edges") + + print("=== Network Download and Preparation Complete ===") + return g_connected + + +def scan_network_directories_for_ways(directory): + import csv + import subprocess + import os + + def calculate_ways(osm_file): + try: + # Use osmium to get file info with summary + result = subprocess.run(['osmium', 'fileinfo', '-e', osm_file], + capture_output=True, text=True) + # Initialize ways_count variable + ways_count = 0 + + # Extract the number of ways from the output + for line in result.stdout.splitlines(): + if "Number of ways" in line: + ways_count = line.split(":")[1].strip() # Get the number of ways + break # Stop after finding the count + + return ways_count # Return the number of ways + except Exception as e: + print(f"Error processing {osm_file}: {e}") + return 0 + + output_file = os.path.join(directory, 'ways_count.csv') + scanned_files = set() + + # Check if output file exists and load already processed files + if os.path.exists(output_file): + try: + with open(output_file, 'r', newline='') as f: + reader = csv.reader(f) + next(reader, None) # Skip header, safely + for row in reader: + if len(row) >= 3: # Ensure the row has enough columns + scanned_files.add(row[2]) # Add scanned file path to the set + except Exception as e: + print(f"Error reading existing CSV: {e}") + else: + # Create the output file and write the header + with open(output_file, 'w', newline='') as f: + writer = csv.writer(f) + writer.writerow(['name', 'ways', 'path']) + print(f"Created output file: {output_file}") + + print(f"Scanning directory: {directory}") # Log current directory being scanned + for root, dirs, files in os.walk(directory): + # Skip archive directories + if 'archive' in root.lower(): + print(f"Ignoring archive directory: {root}") + continue + + # Look for the first osm.pbf file using next() with a generator expression + osm_file_path = next((os.path.join(root, file) for file in files if file.endswith('.osm.pbf')), None) + + if osm_file_path is not None: + if osm_file_path in scanned_files: + print(f"PBF file already processed: {osm_file_path}") # Log already processed directory + continue + else: + # Extract network name from the file name or directory name + network_name = os.path.basename(root) # Use the directory name as the network name + number_of_ways = calculate_ways(osm_file_path) + + # Ensure file ends with newline before appending + """Ensure the file ends with a newline character.""" + if os.path.exists(output_file) and os.path.getsize(output_file) > 0: + with open(output_file, 'rb+') as f: + f.seek(-1, os.SEEK_END) # Go to the last byte + last_char = f.read(1) + if last_char != b'\n': + f.seek(0, os.SEEK_END) # Go to the end of the file + f.write(b'\n') # Add a newline if it doesn't end with one + + # Append result to the output CSV file + with open(output_file, 'a', newline='') as f: + writer = csv.writer(f) + writer.writerow([network_name, number_of_ways, osm_file_path]) # Write network name, number of ways, and path + print(f"Appended to CSV: {network_name}, {number_of_ways}, {osm_file_path}") # Log appended data + else: + print(f"No OSM file found in this directory: {root}.") # Log message if no file found + continue # Skip to the next directory if no file is found + +def check_invalid_coordinates(graph): + """ + Check for invalid coordinates in the graph nodes. + + Parameters: + ----------- + graph : networkx.MultiDiGraph + The graph to check + + Returns: + -------- + tuple + (has_invalid, invalid_nodes) where: + - has_invalid: boolean indicating if any invalid coordinates were found + - invalid_nodes: list of node IDs with invalid coordinates + """ + nodes, _ = ox.graph_to_gdfs(graph) + + # Check for NaN, infinite, or out-of-range coordinates + invalid_x = ~nodes['x'].between(-180, 180) | nodes['x'].isna() | nodes['x'].abs().eq(float('inf')) + invalid_y = ~nodes['y'].between(-90, 90) | nodes['y'].isna() | nodes['y'].abs().eq(float('inf')) + + # Combine invalid x or y + invalid_nodes = nodes[invalid_x | invalid_y] + + if len(invalid_nodes) > 0: + print(f"\nWARNING: Found {len(invalid_nodes)} nodes with invalid coordinates:") + for idx, node in invalid_nodes.iterrows(): + print(f" Node ID: {idx}, x: {node['x']}, y: {node['y']}") + return True, invalid_nodes.index.tolist() + + return False, [] + + +class OSMTagHandler(osmium.SimpleHandler): + def __init__(self): + osmium.SimpleHandler.__init__(self) + # Create separate counters for different element types + self.way_tag_counters = defaultdict(Counter) + self.node_tag_counters = defaultdict(Counter) + self.relation_tag_counters = defaultdict(Counter) + self.other_tags_counters = defaultdict(Counter) + self.records = [] + self.unique_tags = set() + self.total_ways = 0 + self.total_nodes = 0 + self.total_relations = 0 + + def process_other_tags(self, other_tags_str): + """Parse hstore-formatted other_tags string into a dictionary""" + if not other_tags_str: + return {} + + parsed_tags = {} + try: + # Handle the format: "key"=>"value","key2"=>"value2",... + current = "" + in_quotes = False + key = None + parts = [] + + # First split into key=>value parts + for char in other_tags_str: + if char == '"' and (not current or current[-1] != '\\'): + in_quotes = not in_quotes + + current += char + + if char == ',' and not in_quotes: + parts.append(current[:-1]) # Remove the trailing comma + current = "" + + if current: # Add the last part if there is one + parts.append(current) + + # Now process each part to extract key and value + for part in parts: + if "=>" in part: + key_val = part.split("=>") + if len(key_val) == 2: + k = key_val[0].strip().strip('"') + v = key_val[1].strip().strip('"') + parsed_tags[k] = v + + # Update counter for this key-value pair + self.other_tags_counters[k][v] += 1 + except Exception as e: + print(f"Error parsing other_tags: {e}, value: {other_tags_str[:100]}") + + return parsed_tags + + def way(self, w): + """Process a way and its tags""" + self.total_ways += 1 + + # Extract all tags into a dictionary + tags_dict = {} + other_tags_dict = {} + + for tag in w.tags: + tag_key = tag.k + tag_value = tag.v + + # Add to our unique tags set + self.unique_tags.add(tag_key) + + # Store the tag and update its counter for ways + tags_dict[tag_key] = tag_value + self.way_tag_counters[tag_key][tag_value] += 1 + + # Check if this is an other_tags field that needs parsing + if tag_key == 'other_tags': + other_tags_dict = self.process_other_tags(tag_value) + + # Store the record with all its tags + record = { + 'id': w.id, + **tags_dict, + 'other_tags_parsed': other_tags_dict + } + + self.records.append(record) + + # Also handle nodes and relations if needed + def node(self, n): + self.total_nodes += 1 + for tag in n.tags: + self.unique_tags.add(tag.k) + self.node_tag_counters[tag.k][tag.v] += 1 + + def relation(self, r): + self.total_relations += 1 + for tag in r.tags: + self.unique_tags.add(tag.k) + self.relation_tag_counters[tag.k][tag.v] += 1 + + +def analyze_osm_pbf(file_path, num_top_values=10): + """ + Analyze an OSM PBF file and return statistics about all tags, + separated by element type (way, node, relation) + + Args: + file_path: Path to the OSM PBF file + num_top_values: Optional, Number of top values to report for each tag + + Returns: + Dictionary of statistics and DataFrame of records + """ + print(f"Analyzing OSM PBF file: {file_path}") + handler = OSMTagHandler() + + # Process the file + handler.apply_file(file_path) + + print(f"Processed {handler.total_ways} ways, {handler.total_nodes} nodes, and {handler.total_relations} relations") + print(f"Found {len(handler.unique_tags)} unique tag keys") + + # Create summary statistics for ways + way_stats = {} + for tag_key, counter in handler.way_tag_counters.items(): + total = sum(counter.values()) + way_stats[tag_key] = { + 'count': total, + 'unique_values': len(counter), + 'top_values': dict(counter.most_common(num_top_values)), + 'percent_present': round(total / handler.total_ways * 100, 2) if handler.total_ways > 0 else 0 + } + + # Sort way stats by frequency + way_stats = {k: v for k, v in sorted( + way_stats.items(), + key=lambda item: item[1]['count'], + reverse=True + )} + + # Create summary statistics for nodes + node_stats = {} + for tag_key, counter in handler.node_tag_counters.items(): + total = sum(counter.values()) + node_stats[tag_key] = { + 'count': total, + 'unique_values': len(counter), + 'top_values': dict(counter.most_common(num_top_values)), + 'percent_present': round(total / handler.total_nodes * 100, 2) if handler.total_nodes > 0 else 0 + } + + # Sort node stats by frequency + node_stats = {k: v for k, v in sorted( + node_stats.items(), + key=lambda item: item[1]['count'], + reverse=True + )} + + # Create summary statistics for relations + relation_stats = {} + for tag_key, counter in handler.relation_tag_counters.items(): + total = sum(counter.values()) + relation_stats[tag_key] = { + 'count': total, + 'unique_values': len(counter), + 'top_values': dict(counter.most_common(num_top_values)), + 'percent_present': round(total / handler.total_relations * 100, 2) if handler.total_relations > 0 else 0 + } + + # Sort relation stats by frequency + relation_stats = {k: v for k, v in sorted( + relation_stats.items(), + key=lambda item: item[1]['count'], + reverse=True + )} + + # Create similar statistics for other_tags fields + other_tags_stats = {} + for tag_key, counter in handler.other_tags_counters.items(): + total = sum(counter.values()) + other_tags_stats[tag_key] = { + 'count': total, + 'unique_values': len(counter), + 'top_values': dict(counter.most_common(num_top_values)), + 'percent_present': round(total / (handler.total_ways + handler.total_nodes + handler.total_relations) * 100, 2) + } + + # Sort other_tags stats by frequency + other_tags_stats = {k: v for k, v in sorted( + other_tags_stats.items(), + key=lambda item: item[1]['count'], + reverse=True + )} + + # Create a DataFrame from the records + records_df = pd.DataFrame(handler.records) if handler.records else pd.DataFrame() + + # Return the summary statistics and records + return { + 'total_ways': handler.total_ways, + 'total_nodes': handler.total_nodes, + 'total_relations': handler.total_relations, + 'unique_tags': list(handler.unique_tags), + 'way_stats': way_stats, + 'node_stats': node_stats, + 'relation_stats': relation_stats, + 'other_tags_stats': other_tags_stats + }, records_df + + +def print_tag_stats(stats, category_name="Tags", element_type="Elements", limit=None): + """Print tag statistics in a formatted way""" + print(f"\n=== {category_name} Statistics for {element_type} ===") + print(f"Total unique {category_name.lower()}: {len(stats)}") + + for i, (tag, data) in enumerate(stats.items()): + if limit and i >= limit: + print(f"\n... and {len(stats) - limit} more {category_name.lower()}.") + break + + print(f"\n{i + 1}. {tag}: {data['count']} instances ({data['percent_present']}% of {element_type.lower()})") + print(f" Unique values: {data['unique_values']}") + print(" Top values:") + + # Print top values with their counts + for val, count in data['top_values'].items(): + # Truncate very long values + display_val = val[:50] + "..." if len(val) > 50 else val + print(f" - {display_val}: {count}") + + +def check_duplicate_edge_ids(edges_gdf, id_column='edge_id'): + """ + Check for duplicate edge IDs in an OSMnx edges GeoDataFrame. + + Parameters: + ----------- + edges_gdf : GeoDataFrame + The edges GeoDataFrame from ox.graph_to_gdfs() + id_column : str, default 'edge_id' + The column name containing the edge IDs to check + + Returns: + -------- + tuple + (has_duplicates, duplicate_info) where: + - has_duplicates: Boolean indicating if duplicates were found + - duplicate_info: DataFrame containing the duplicate IDs and their counts + """ + # Count occurrences of each edge_id + id_counts = edges_gdf[id_column].value_counts() + + # Filter to only those with count > 1 (duplicates) + duplicates = id_counts[id_counts > 1] + + if len(duplicates) > 0: + # Create a DataFrame with duplicate IDs and their counts + duplicate_info = duplicates.reset_index() + duplicate_info.columns = ['edge_id', 'count'] + + # Get examples of each duplicate + examples = [] + for dup_id in duplicate_info['edge_id']: + # Get the first few examples of this duplicate ID + example_edges = edges_gdf[edges_gdf[id_column] == dup_id].head(3) + examples.append(example_edges) + + if examples: + # Concatenate all example edges into one DataFrame + examples_df = pd.concat(examples) + duplicate_info = (duplicate_info, examples_df) + + print(f"Found {len(duplicates)} duplicate edge IDs out of {len(edges_gdf)} total edges") + return True, duplicate_info + else: + print(f"No duplicate edge IDs found in {len(edges_gdf)} edges") + return False, None + + +def main(file_path=None): + """Main function to analyze an OSM PBF file""" + if not file_path: + print("\nNo file provided. To analyze a file, run: python osm_analyzer.py ") + return + + # Analyze the PBF file + stats, records_df = analyze_osm_pbf(file_path, 30) + + print(f"\n=== OSM PBF Analysis Summary ===") + print(f"Total ways processed: {stats['total_ways']}") + print(f"Total nodes processed: {stats['total_nodes']}") + print(f"Total relations processed: {stats['total_relations']}") + print(f"Total unique tags found: {len(stats['unique_tags'])}") + + # Print way tag statistics + print_tag_stats(stats['way_stats'], "Way Tags", "Ways", limit=20) + + # Print node tag statistics + print_tag_stats(stats['node_stats'], "Node Tags", "Nodes", limit=20) + + # Print relation tag statistics + print_tag_stats(stats['relation_stats'], "Relation Tags", "Relations", limit=20) + + # Print other_tags statistics + print_tag_stats(stats['other_tags_stats'], "other_tags Keys", "All Elements", limit=20) + + # Show column names in the data + if not records_df.empty: + print("\n=== DataFrame Columns ===") + columns = list(records_df.columns) + for i, col in enumerate(columns): + print(f"{i + 1}. {col}") + + return stats, records_df + + +if __name__ == "__main__": + if len(sys.argv) < 2: + main() + else: + main(sys.argv[1]) + + + diff --git a/src/main/python/network_validation/osm_xml.py b/src/main/python/network_validation/osm_xml.py new file mode 100644 index 00000000000..cc0b2f27f2a --- /dev/null +++ b/src/main/python/network_validation/osm_xml.py @@ -0,0 +1,476 @@ +"""Read/write .osm formatted XML files.""" + +import bz2 +import xml.sax +from pathlib import Path +from warnings import warn +from xml.etree import ElementTree as ET +import osmnx as ox +from osmnx import settings +from osmnx import truncate +import networkx as nx +import numpy as np +import pandas as pd + +class _OSMContentHandler(xml.sax.handler.ContentHandler): + """ + SAX content handler for OSM XML. + + Used to build an Overpass-like response JSON object in self.object. For + format notes, see https://wiki.openstreetmap.org/wiki/OSM_XML and + https://overpass-api.de + """ + + def __init__(self): + self._element = None + self.object = {"elements": []} + + def startElement(self, name, attrs): + if name == "osm": + self.object.update({k: v for k, v in attrs.items() if k in {"version", "generator"}}) + + elif name in {"node", "way"}: + self._element = dict(type=name, tags={}, nodes=[], **attrs) + self._element.update({k: float(v) for k, v in attrs.items() if k in {"lat", "lon"}}) + self._element.update( + {k: int(v) for k, v in attrs.items() if k in {"id", "uid", "version", "changeset"}} + ) + + elif name == "relation": + self._element = dict(type=name, tags={}, members=[], **attrs) + self._element.update( + {k: int(v) for k, v in attrs.items() if k in {"id", "uid", "version", "changeset"}} + ) + + elif name == "tag": + self._element["tags"].update({attrs["k"]: attrs["v"]}) + + elif name == "nd": + self._element["nodes"].append(int(attrs["ref"])) + + elif name == "member": + self._element["members"].append( + {k: (int(v) if k == "ref" else v) for k, v in attrs.items()} + ) + + def endElement(self, name): + if name in {"node", "way", "relation"}: + self.object["elements"].append(self._element) + + +def _overpass_json_from_file(filepath, encoding): + """ + Read OSM XML from file and return Overpass-like JSON. + + Parameters + ---------- + filepath : string or pathlib.Path + path to file containing OSM XML data + encoding : string + the XML file's character encoding + + Returns + ------- + OSMContentHandler object + """ + + # open the XML file, handling bz2 or regular XML + def _opener(filepath, encoding): + if filepath.suffix == ".bz2": + return bz2.open(filepath, mode="rt", encoding=encoding) + + # otherwise just open it if it's not bz2 + return filepath.open(encoding=encoding) + + # warn if this XML file was generated by OSMnx itself + with _opener(Path(filepath), encoding) as f: + root_attrs = ET.parse(f).getroot().attrib + if "generator" in root_attrs and "OSMnx" in root_attrs["generator"]: + warn( + "The XML file you are loading appears to have been generated " + "by OSMnx: this use case is not supported and may not behave " + "as expected. To save/load graphs to/from disk for later use " + "in OSMnx, use the `io.save_graphml` and `io.load_graphml` " + "functions instead. Refer to the documentation for details.", + stacklevel=2, + ) + + # parse the XML to Overpass-like JSON + with _opener(Path(filepath), encoding) as f: + handler = _OSMContentHandler() + xml.sax.parse(f, handler) + return handler.object + + +def save_graph_xml( + data, + filepath=None, + edge_tags=None, + edge_tag_aggs=None +): + """ + Parameters + ---------- + data : networkx.MultiDiGraph + do not use, deprecated + filepath : string or pathlib.Path + do not use, deprecated + edge_tags : list + do not use, deprecated + edge_tag_aggs : tuple + do not use, deprecated + + Returns + ------- + None + """ + _save_graph_xml( + data, + filepath, + edge_tags, + edge_tag_aggs + ) + + +def _save_graph_xml( # noqa: C901 + data, + filepath, + edge_tags, + edge_tag_aggs +): + """ + Save graph to disk as an OSM-formatted UTF-8 encoded XML .osm file. + + Parameters + ---------- + data : networkx.MultiDiGraph + the input graph + filepath : string or pathlib.Path + do not use, deprecated + edge_tags : list + do not use, deprecated + edge_tag_aggs : tuple + do not use, deprecated + + Returns + ------- + None + """ + if edge_tags is None: + edge_tags = ["highway", "lanes", "maxspeed", "name", "oneway"] + oneway = False + merge_edges = True + api_version = 0.6 + precision = 6 + edge_attrs = ["id", "timestamp", "uid", "user", "version", "changeset"] + node_tags = ["highway"] + node_attrs = [ + "id", + "timestamp", + "uid", + "user", + "version", + "changeset", + "lat", + "lon", + ] + + # default filepath if none was provided + filepath = Path(settings.data_folder) / "graph.osm" if filepath is None else Path(filepath) + + # if save folder does not already exist, create it + filepath.parent.mkdir(parents=True, exist_ok=True) + + if not settings.all_oneway: # pragma: no cover + warn( + "For the `save_graph_xml` function to behave properly, the graph " + "must have been created with `ox.settings.all_oneway=True`.", + stacklevel=2, + ) + + try: + gdf_nodes, gdf_edges = data + except ValueError: + gdf_nodes, gdf_edges = ox.convert.graph_to_gdfs( + data, node_geometry=False, fill_edge_geometry=False + ) + + # rename columns per osm specification + gdf_nodes = gdf_nodes.rename(columns={"x": "lon", "y": "lat"}) + gdf_nodes["lon"] = gdf_nodes["lon"].round(precision) + gdf_nodes["lat"] = gdf_nodes["lat"].round(precision) + gdf_nodes = gdf_nodes.reset_index().rename(columns={"osmid": "id"}) + if "id" in gdf_edges.columns: + gdf_edges = gdf_edges[[col for col in gdf_edges if col != "id"]] + if "uniqueid" in gdf_edges.columns: + gdf_edges = gdf_edges.rename(columns={"uniqueid": "id"}) + else: + gdf_edges = gdf_edges.reset_index().reset_index().rename(columns={"index": "id"}) + + # add default values for required attributes + for table in (gdf_nodes, gdf_edges): + table["uid"] = "1" + table["user"] = "OSMnx" + table["version"] = "1" + table["changeset"] = "1" + table["timestamp"] = ox.utils.ts(template="{:%Y-%m-%dT%H:%M:%SZ}") + + # misc. string replacements to meet OSM XML spec + if "oneway" in gdf_edges.columns: + # fill blank oneway tags with default (False) + gdf_edges.loc[pd.isna(gdf_edges["oneway"]), "oneway"] = oneway + gdf_edges.loc[:, "oneway"] = gdf_edges["oneway"].astype(str) + gdf_edges.loc[:, "oneway"] = ( + gdf_edges["oneway"].str.replace("False", "no").replace("True", "yes") + ) + + # initialize XML tree with an OSM root element then append nodes/edges + root = ET.Element( + "osm", attrib={"version": str(api_version), "generator": f"OSMnx {ox.__version__}"} + ) + root = _append_nodes_xml_tree(root, gdf_nodes, node_attrs, node_tags) + root = _append_edges_xml_tree( + root, gdf_edges, edge_attrs, edge_tags, edge_tag_aggs, merge_edges + ) + + # write to disk + ET.ElementTree(root).write(filepath, encoding="utf-8", xml_declaration=True) + ox.utils.log(f"Saved graph as .osm file at {filepath!r}") + + +def _append_nodes_xml_tree(root, gdf_nodes, node_attrs, node_tags): + """ + Append nodes to an XML tree. + + Parameters + ---------- + root : ElementTree.Element + xml tree + gdf_nodes : geopandas.GeoDataFrame + GeoDataFrame of graph nodes + node_attrs : list + osm way attributes to include in output OSM XML + node_tags : list + osm way tags to include in output OSM XML + + Returns + ------- + root : ElementTree.Element + xml tree with nodes appended + """ + for _, row in gdf_nodes.iterrows(): + row_str = row.dropna().astype(str) + node = ET.SubElement(root, "node", attrib=row_str[node_attrs].to_dict()) + + for tag in node_tags: + if tag in row_str: + ET.SubElement(node, "tag", attrib={"k": tag, "v": row_str[tag]}) + return root + + +def _create_way_for_each_edge(root, gdf_edges, edge_attrs, edge_tags): + """ + Append a new way to an empty XML tree graph for each edge in way. + + This will generate separate OSM ways for each network edge, even if the + edges are all part of the same original OSM way. As such, each way will be + composed of two nodes, and there will be many ways with the same OSM ID. + This does not conform to the OSM XML schema standard, but the data will + still comprise a valid network and will be readable by most OSM tools. + + Parameters + ---------- + root : ElementTree.Element + an empty XML tree + gdf_edges : geopandas.GeoDataFrame + GeoDataFrame of graph edges + edge_attrs : list + osm way attributes to include in output OSM XML + edge_tags : list + osm way tags to include in output OSM XML + """ + for _, row in gdf_edges.iterrows(): + row_str = row.dropna().astype(str) + edge = ET.SubElement(root, "way", attrib=row_str[edge_attrs].to_dict()) + ET.SubElement(edge, "nd", attrib={"ref": row_str["u"]}) + ET.SubElement(edge, "nd", attrib={"ref": row_str["v"]}) + for tag in edge_tags: + if tag in row_str: + ET.SubElement(edge, "tag", attrib={"k": tag, "v": row_str[tag]}) + + +def _append_merged_edge_attrs(xml_edge, sample_edge, all_edges_df, edge_tags, edge_tag_aggs): + """ + Extract edge attributes and append to XML edge. + + Parameters + ---------- + xml_edge : ElementTree.SubElement + XML representation of an output graph edge + sample_edge: pandas.Series + sample row from the the dataframe of way edges + all_edges_df: pandas.DataFrame + a dataframe with one row for each edge in an OSM way + edge_tags : list + osm way tags to include in output OSM XML + edge_tag_aggs : list of length-2 string tuples + useful only if merge_edges is True, this argument allows the user to + specify edge attributes to aggregate such that the merged OSM way + entry tags accurately represent the sum total of their component edge + attributes. For example if the user wants the OSM way to have a length + attribute, the user must specify `edge_tag_aggs=[('length', 'sum')]` + to tell this method to aggregate the lengths of the individual + component edges. Otherwise, the length attribute will simply reflect + the length of the first edge associated with the way. + + """ + if edge_tag_aggs is None: + for tag in edge_tags: + if tag in sample_edge: + ET.SubElement(xml_edge, "tag", attrib={"k": tag, "v": sample_edge[tag]}) + else: + for tag in edge_tags: + if (tag in sample_edge) and (tag not in (t for t, agg in edge_tag_aggs)): + ET.SubElement(xml_edge, "tag", attrib={"k": tag, "v": sample_edge[tag]}) + + for tag, agg in edge_tag_aggs: + if tag in all_edges_df.columns: + ET.SubElement( + xml_edge, + "tag", + attrib={ + "k": tag, + "v": str(all_edges_df[tag].aggregate(agg)), + }, + ) + + +def _append_nodes_as_edge_attrs(xml_edge, sample_edge, all_edges_df): + """ + Extract list of ordered nodes and append as attributes of XML edge. + + Parameters + ---------- + xml_edge : ElementTree.SubElement + XML representation of an output graph edge + sample_edge: pandas.Series + sample row from the the dataframe of way edges + all_edges_df: pandas.DataFrame + a dataframe with one row for each edge in an OSM way + """ + if len(all_edges_df) == 1: + ET.SubElement(xml_edge, "nd", attrib={"ref": sample_edge["u"]}) + ET.SubElement(xml_edge, "nd", attrib={"ref": sample_edge["v"]}) + else: + # topological sort + all_edges_df = all_edges_df.reset_index() + try: + ordered_nodes = _get_unique_nodes_ordered_from_way(all_edges_df) + except nx.NetworkXUnfeasible: + first_node = all_edges_df.iloc[0]["u"] + ordered_nodes = _get_unique_nodes_ordered_from_way(all_edges_df.iloc[1:]) + ordered_nodes = [first_node] + ordered_nodes + for node in ordered_nodes: + ET.SubElement(xml_edge, "nd", attrib={"ref": str(node)}) + + +def _append_edges_xml_tree(root, gdf_edges, edge_attrs, edge_tags, edge_tag_aggs, merge_edges): + """ + Append edges to an XML tree. + + Parameters + ---------- + root : ElementTree.Element + xml tree + gdf_edges : geopandas.GeoDataFrame + GeoDataFrame of graph edges + edge_attrs : list + osm way attributes to include in output OSM XML + edge_tags : list + osm way tags to include in output OSM XML + edge_tag_aggs : list of length-2 string tuples + useful only if merge_edges is True, this argument allows the user + to specify edge attributes to aggregate such that the merged + OSM way entry tags accurately represent the sum total of + their component edge attributes. For example, if the user + wants the OSM way to have a "length" attribute, the user must + specify `edge_tag_aggs=[('length', 'sum')]` in order to tell + this method to aggregate the lengths of the individual + component edges. Otherwise, the length attribute will simply + reflect the length of the first edge associated with the way. + merge_edges : bool + if True merges graph edges such that each OSM way has one entry + and one entry only in the OSM XML. Otherwise, every OSM way + will have a separate entry for each node pair it contains. + + Returns + ------- + root : ElementTree.Element + XML tree with edges appended + """ + gdf_edges = gdf_edges.reset_index() + if merge_edges: + for _, all_way_edges in gdf_edges.groupby("id"): + first = all_way_edges.iloc[0].dropna().astype(str) + edge = ET.SubElement(root, "way", attrib=first[edge_attrs].dropna().to_dict()) + _append_nodes_as_edge_attrs( + xml_edge=edge, sample_edge=first, all_edges_df=all_way_edges + ) + _append_merged_edge_attrs( + xml_edge=edge, + sample_edge=first, + edge_tags=edge_tags, + edge_tag_aggs=edge_tag_aggs, + all_edges_df=all_way_edges, + ) + + else: + _create_way_for_each_edge( + root=root, + gdf_edges=gdf_edges, + edge_attrs=edge_attrs, + edge_tags=edge_tags, + ) + + return root + + +def _get_unique_nodes_ordered_from_way(df_way_edges): + """ + Recover original node order from edges associated with a single OSM way. + + Parameters + ---------- + df_way_edges : pandas.DataFrame + Dataframe containing columns 'u' and 'v' corresponding to + origin/destination nodes. + + Returns + ------- + unique_ordered_nodes : list + An ordered list of unique node IDs. If the edges do not all connect + (e.g. [(1, 2), (2,3), (10, 11), (11, 12), (12, 13)]), then this method + will return only those nodes associated with the largest component of + connected edges, even if subsequent connected chunks are contain more + total nodes. This ensures a proper topological representation of nodes + in the XML way records because if there are unconnected components, + the sorting algorithm cannot recover their original order. We would + not likely ever encounter this kind of disconnected structure of nodes + within a given way, but it is not explicitly forbidden in the OSM XML + design schema. + """ + G = nx.MultiDiGraph() + all_nodes = list(df_way_edges["u"].to_numpy()) + list(df_way_edges["v"].to_numpy()) + + G.add_nodes_from(all_nodes) + G.add_edges_from(df_way_edges[["u", "v"]].to_numpy()) + + # copy nodes into new graph + H = ox.truncate.largest_component(G, strongly=False) + unique_ordered_nodes = list(nx.topological_sort(H)) + num_unique_nodes = len(np.unique(all_nodes)) + + if len(unique_ordered_nodes) < num_unique_nodes: + ox.utils.log(f"Recovered order for {len(unique_ordered_nodes)} of {num_unique_nodes} nodes") + + return unique_ordered_nodes diff --git a/src/main/python/network_validation/prepare_npmrds_data_and_stations.py b/src/main/python/network_validation/prepare_npmrds_data_and_stations.py index 604ff3146f6..bca9dadc887 100644 --- a/src/main/python/network_validation/prepare_npmrds_data_and_stations.py +++ b/src/main/python/network_validation/prepare_npmrds_data_and_stations.py @@ -1,24 +1,41 @@ from validation_utils import * +from _data_collection_utils import collect_geographic_boundaries +import sys + +# Get the absolute path to the directory containing this script +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(os.path.dirname(current_dir)) +sys.path.insert(0, parent_dir) + +from python.utils.study_area_config import get_area_config +from python.utils.study_area_config import generate_network_name # In Mac, you might need to cd to folder '/Applications/Python {version}' # and run ./Install\ Certificates.command # # To prepare data for a new study area, make sure to change the following configuration variables # Example of preparing SFBay data (9 counties + Santa Cruz and Yolo) -census_year = 2018 -state_fips = '06' study_area = "sfbay" -study_area_crs = 26910 -study_area_fips = ['001', '013', '041', '055', '075', '081', '085', '095', '097', '087', '113'] - -"beam/runs/calibration-jdeqsim" -study_area_dir = os.path.expanduser("~/Workspace/Data/Simulation") + "/" + study_area -study_area_taz_geo = study_area_dir + "/geo/shp/sfbay-tazs-epsg-26910.shp" -study_area_taz_id = "taz1454" -study_area_beam_network_csv = study_area_dir + '/validation/beam/sfbay_residential_psimpl_network.csv.gz' -npmrds_data_label = "NPMRDS_2018" -npmrds_raw_geo = study_area_dir + "/validation/npmrds/California.shp" -npmrds_raw_data_csv = study_area_dir + '/validation/npmrds/al_ca_oct2018_1hr_trucks_pax.csv' +batch = "20240123" +scenario = "2018-Baseline-FC12-Bis2" +config = get_area_config(study_area) +config["network"]["graph_layers"]["residential"]["min_density_per_km2"] = 5500 + +config_network = config["network"] +config_npmrds = config_network["validation"]["npmrds"] +config_geo = config["geo"] + +network_name = generate_network_name(config) +network_dir = f'{config["work_dir"]}/network/{network_name}' +osm_pbf_path = os.path.expanduser(f"{network_dir}/{network_name}.osm.pbf") + +output_dir = f"{config["work_dir"]}/beam-runs/{batch}/{scenario}" + +# census_year = 2018 +# state_fips = '06' +# study_area_crs = 26910 +# study_area_fips = ['001', '013', '041', '055', '075', '081', '085', '095', '097', '087', '113'] + # # The following need to be set/added manually # census_year = 2018 @@ -35,37 +52,34 @@ # npmrds_raw_data_csv = study_area_dir + '/validation_data/NPMRDS/vt_wi_2018_1hr.csv' # # -# -# -# -# -# The following will be generated automatically -study_area_county_geo = study_area_dir + "/geo/" + study_area + "_counties.geojson" -study_area_cbg_geo = study_area_dir + "/geo/" + study_area + "_cbgs.geojson" -study_area_cbg_id = "GEOID" -# study_area_taz_geo = study_area_dir + "/geo/" + study_area + "_tazs.geojson" -# study_area_taz_id = "TAZCE10" -study_area_cbg_taz_map_csv = study_area_dir + "/geo/" + study_area + "_cbg_taz_map.csv" -# -npmrds_station_geo = study_area_dir + '/validation/npmrds/' + study_area + "_npmrds_station.geojson" -npmrds_data_csv = study_area_dir + '/validation/npmrds/' + study_area + "_npmrds_data.csv" -npmrds_hourly_speed_csv = study_area_dir + '/validation/npmrds/' + study_area + "_npmrds_hourly_speeds.csv" -npmrds_hourly_speed_by_road_class_csv = study_area_dir + '/validation/npmrds/' + study_area + "_npmrds_hourly_speed_by_road_class.csv" -# -first_dot_index = study_area_beam_network_csv.find('.') -beam_network_prefix = study_area_beam_network_csv[:first_dot_index] if first_dot_index != -1 else study_area_beam_network_csv -beam_network_car_links_geo = beam_network_prefix + '_car_only.geojson' -beam_network_mapped_to_npmrds_geo = beam_network_prefix + '_mapped_to_npmrds.geojson' +# # The following will be generated automatically +# study_area_county_geo = study_area_dir + "/geo/" + study_area + "_counties.geojson" +# study_area_cbg_geo = study_area_dir + "/geo/" + study_area + "_cbgs.geojson" +# # study_area_taz_geo = study_area_dir + "/geo/" + study_area + "_tazs.geojson" +# # study_area_taz_id = "TAZCE10" +# study_area_cbg_taz_map_csv = study_area_dir + "/geo/" + study_area + "_cbg_taz_map.csv" +# # +# npmrds_station_geo = study_area_dir + '/validation/npmrds/' + study_area + "_npmrds_station.geojson" +# npmrds_data_csv = study_area_dir + '/validation/npmrds/' + study_area + "_npmrds_data.csv" +# npmrds_hourly_speed_csv = study_area_dir + '/validation/npmrds/' + study_area + "_npmrds_hourly_speeds.csv" +# npmrds_hourly_speed_by_road_class_csv = study_area_dir + '/validation/npmrds/' + study_area + "_npmrds_hourly_speed_by_road_class.csv" +# # +# first_dot_index = study_area_beam_network_csv.find('.') +# beam_network_prefix = study_area_beam_network_csv[ +# :first_dot_index] if first_dot_index != -1 else study_area_beam_network_csv +# beam_network_car_links_geo = beam_network_prefix + '_car_only.geojson' +# beam_network_mapped_to_npmrds_geo = beam_network_prefix + '_mapped_to_npmrds.geojson' st = time.time() -if os.path.exists(study_area_county_geo): - print("Loading county boundaries...") - region_boundary_wgs84 = gpd.read_file(study_area_county_geo) -else: - print("Downloading county boundaries...") - region_boundary_wgs84 = collect_geographic_boundaries(state_fips, study_area_fips, census_year, study_area_county_geo, - study_area_crs, geo_level='county') +region_boundary_wgs84 = collect_geographic_boundaries( + config["state_fips"], + config["county_fips"], + config["census_year"], + study_area, + geo_level='county', + work_dir=f'{config["work_dir"]}/geo' + ) # sf_cbg_geo = study_area_dir + "/zones/sf_cbgs.geojson" # if os.path.exists(sf_cbg_geo): @@ -76,45 +90,42 @@ # region_boundary_wgs84 = collect_geographic_boundaries(state_fips, ['075'], cbg_year, sf_cbg_geo, # projected_coordinate_system, geo_level='cbg') -if os.path.exists(study_area_cbg_geo): - print("Loading block groups boundaries...") - cbg_boundary_wgs84 = gpd.read_file(study_area_cbg_geo) -else: - print("Downloading block groups boundaries...") - cbg_boundary_wgs84 = collect_geographic_boundaries(state_fips, study_area_fips, census_year, study_area_cbg_geo, - study_area_crs, geo_level='cbg') - -if os.path.exists(study_area_taz_geo): - print("Loading taz boundaries...") - taz_boundary_wgs84 = gpd.read_file(study_area_taz_geo).to_crs(epsg=4326) -else: - print("Downloading taz boundaries...") - taz_boundary_wgs84 = None - # TODO We need to fix the source of the TAZ data - # taz_boundary_wgs84 = collect_geographic_boundaries(state_fips, study_area_fips, 2011, study_area_taz_geo, - # projected_coordinate_system, geo_level='taz') - -if taz_boundary_wgs84 is not None: - print("Mapping block groups to taz boundaries.") - map_cbg_to_taz(cbg_boundary_wgs84, study_area_cbg_id, taz_boundary_wgs84, study_area_taz_id, study_area_crs, study_area_cbg_taz_map_csv) +cbg_boundary_wgs84 = collect_geographic_boundaries( + config["state_fips"], + config["county_fips"], + config["census_year"], + study_area, + geo_level='county', + work_dir=f'{config["work_dir"]}/geo' +) + +# taz_boundary_wgs84 = gpd.read_file(f"{config['work_dir']}/{config_geo["taz_shp"]}").to_crs(epsg=4326) +# map_cbg_to_taz( +# cbg_boundary_wgs84, +# config_geo["cbg_id"], +# taz_boundary_wgs84, +# config_geo["taz_id"], +# config_geo["utm_epsg"], +# study_area_cbg_taz_map_csv +# ) regional_npmrds_station, _, beam_npmrds_network_map, _ = prepare_npmrds_data( # input - npmrds_label=npmrds_data_label, - npmrds_raw_geo=npmrds_raw_geo, - npmrds_raw_data_csv=npmrds_raw_data_csv, + npmrds_label=f"NPMRDS_{config_npmrds["year"]}", + npmrds_raw_geo=f"{config['work_dir']}/{config_npmrds["geo"]}", + npmrds_raw_data_csv=f'{config['work_dir']}/{config_npmrds["data"]}', npmrds_observed_speed_weight=0.5, region_boundary=region_boundary_wgs84, - beam_network_csv_input=study_area_beam_network_csv, - projected_crs_epsg=study_area_crs, + beam_network_csv_input=f"{network_dir}/network.csv.gz", + projected_crs_epsg=config_geo["utm_epsg"], distance_buffer_m=20, # output - npmrds_station_geo=npmrds_station_geo, - npmrds_data_csv=npmrds_data_csv, - npmrds_hourly_speed_csv=npmrds_hourly_speed_csv, - npmrds_hourly_speed_by_road_class_csv=npmrds_hourly_speed_by_road_class_csv, - beam_network_car_links_geo=beam_network_car_links_geo, - beam_npmrds_network_map_geo=beam_network_mapped_to_npmrds_geo) + npmrds_station_geo=f"{output_dir}/{study_area}_npmrds_station.geojson", + npmrds_data_csv=f"{output_dir}/{study_area}_npmrds_data.csv", + npmrds_hourly_speed_csv=f"{output_dir}/{study_area}_npmrds_hourly_speeds.csv", + npmrds_hourly_speed_by_road_class_csv=f"{output_dir}/{study_area}_npmrds_hourly_speed_by_road_class.csv", + beam_network_car_links_geo=f"{output_dir}/{study_area}_network_car_only.geojson", + beam_npmrds_network_map_geo=f"{output_dir}/{study_area}_network_mapped_to_npmrds.geojson") # ########## Checking Network print("Plotting region boundaries and stations") @@ -123,7 +134,7 @@ region_boundary_wgs84.boundary.plot(ax=ax, color='black') regional_npmrds_station.plot(ax=ax, color='blue') plt.title("Region Boundaries and NPMRDS Stations") -fig.savefig(os.path.splitext(npmrds_station_geo)[0] + ".png", dpi=300) # Adjust dpi for resolution +fig.savefig(f"{output_dir}/{study_area}_npmrds_station.png", dpi=300) # Adjust dpi for resolution plt.show(block=False) print("Plotting BEAM Network and NPMRDS stations") @@ -132,7 +143,7 @@ regional_npmrds_station.plot(ax=ax, color='blue', linewidth=2, label='NPMRDS') beam_npmrds_network_map.plot(ax=ax, color='red', linewidth=0.5, label='BEAM') plt.title("BEAM Network and NPMRDS Stations") -fig.savefig(os.path.splitext(beam_network_mapped_to_npmrds_geo)[0] + ".png", dpi=300) # Adjust dpi for resolution +fig.savefig(f"{output_dir}/{study_area}_network_mapped_to_npmrds.png", dpi=300) # Adjust dpi for resolution plt.show(block=False) -print(f"Execution time of prepare_npmrds_data: {(time.time() - st) / 60.0}min") +print(f"Execution time of prepare_npmrds_data: {((time.time() - st) / 60.0):.2f}min") \ No newline at end of file diff --git a/src/main/python/network_validation/process_isrm_network_intersection.py b/src/main/python/network_validation/process_isrm_network_intersection.py new file mode 100644 index 00000000000..e281f038a10 --- /dev/null +++ b/src/main/python/network_validation/process_isrm_network_intersection.py @@ -0,0 +1,396 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +Script to map ISRM grid polygons to OSM edge geometries. +The result splits each OSM edge by ISRM polygon and calculates the proportion +of the edge length in each polygon, starting from the ISRM grid. +All operations are performed in UTM projection and results are converted back to WGS84. +""" + +import logging +import os +import re +import sys + +import geopandas as gpd +import pandas as pd +from tqdm import tqdm + +# Get the absolute path to the directory containing this script +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(os.path.dirname(current_dir)) +sys.path.insert(0, parent_dir) + +# Now use absolute import +from python.utils.study_area_config import get_area_config +from python.utils.study_area_config import generate_network_name + +# Set up logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +# Define WGS84 EPSG code +WGS84_EPSG = 4326 + + +def parse_other_tags(other_tags): + """Parse the 'other_tags' column from OSM PBF file to extract key-value pairs.""" + if not other_tags or pd.isna(other_tags): + return {} + + # Extract key-value pairs using regex + pattern = r'"([^"]+)"=>"([^"]+)"' + matches = re.findall(pattern, other_tags) + return {key: value for key, value in matches} + + +def extract_edge_length(tags_dict): + """Extract the edge length from the tags dictionary.""" + length_str = tags_dict.get('length', None) + if length_str is None: + return None + try: + return float(length_str) + except (ValueError, TypeError): + return None + + +def process_isrm_osm_intersection(isrm_grid_path, osm_geojson_path, osm_gpkg_path, epsg_utm, output_path): + """ + Process the intersection of ISRM grid polygons with OSM edge geometries. + All operations are performed in UTM projection and results are converted back to WGS84. + + Args: + isrm_grid_path (str): Path to ISRM grid shapefile with isrm column + osm_geojson_path (str): Path to OSM GEOJSON file with osm_id and other_tags + osm_gpkg_path (str): Path to OSM GPKG network with edge_id and geometry + epsg_utm (int): EPSG code for UTM projection to use for geometric operations + output_path (str): Path to output file + + Returns: + gpd.GeoDataFrame: The resulting GeoDataFrame with intersection results + """ + # 1. Load ISRM grid + logger.info(f"Loading ISRM grid from {isrm_grid_path}") + try: + isrm_gdf = gpd.read_file(isrm_grid_path) + if 'isrm' not in isrm_gdf.columns: + logger.error("ISRM grid file is missing 'isrm' column") + sys.exit(1) + except Exception as e: + logger.error(f"Failed to load ISRM grid: {e}") + sys.exit(1) + + # 2. Load OSM GPKG network + logger.info(f"Loading OSM GPKG network from {osm_gpkg_path}") + try: + gpkg_gdf = gpd.read_file(osm_gpkg_path, layer='edges') + if 'edge_id' not in gpkg_gdf.columns: + logger.error("OSM GPKG file is missing 'edge_id' column") + sys.exit(1) + except Exception as e: + logger.error(f"Failed to load OSM GPKG: {e}") + sys.exit(1) + + # 3. Load OSM GeoJSON + logger.info(f"Loading OSM GEOJSON from {osm_geojson_path}") + try: + osm_gdf = gpd.read_file(osm_geojson_path) + if 'osm_id' not in osm_gdf.columns or 'other_tags' not in osm_gdf.columns: + logger.error("OSM file is missing 'osm_id' or 'other_tags' columns") + sys.exit(1) + except Exception as e: + logger.error(f"Failed to load OSM GeoJSON: {e}") + sys.exit(1) + + # Convert osm_id to int and parse other_tags + osm_gdf['osm_id'] = osm_gdf['osm_id'].astype(int) + + # Parse other_tags to extract edge_id and length + osm_gdf['parsed_tags'] = osm_gdf['other_tags'].apply(parse_other_tags) + osm_gdf['edge_id'] = osm_gdf['parsed_tags'].apply(lambda x: x.get('edge_id', None)) + osm_gdf['edge_length'] = osm_gdf['parsed_tags'].apply(extract_edge_length) + + # Filter out edges without length information + valid_osm_gdf = osm_gdf.dropna(subset=['edge_length']) + logger.info(f"Found {len(valid_osm_gdf)} edges with valid length information") + + # Connect OSM data to geometries + edge_geom_map = pd.merge( + valid_osm_gdf[['osm_id', 'edge_id', 'edge_length']], + gpkg_gdf[['edge_id', 'geometry']], + on='edge_id', + how='inner' + ) + + # Convert to GeoDataFrame + edge_geom_gdf = gpd.GeoDataFrame(edge_geom_map, geometry='geometry', crs=gpkg_gdf.crs) + logger.info(f"Successfully mapped {len(edge_geom_gdf)} edges to OSM geometries") + + # Project all geometries to UTM for accurate calculations + logger.info(f"Projecting geometries to UTM EPSG:{epsg_utm}") + edge_geom_gdf = edge_geom_gdf.to_crs(epsg=epsg_utm) + isrm_gdf = isrm_gdf.to_crs(epsg=epsg_utm) + + # Create a spatial index for OSM geometries to speed up intersection queries + edge_geom_sindex = edge_geom_gdf.sindex + + # Process ISRM polygons and find intersections + intersection_results = [] + + logger.info("Finding intersections between ISRM polygons and OSM edges") + for idx, isrm_row in tqdm(isrm_gdf.iterrows(), total=len(isrm_gdf), desc="Processing ISRM polygons"): + isrm_id = isrm_row['isrm'] + isrm_geom = isrm_row.geometry + + # Find potential edge geometries that intersect this ISRM polygon + possible_matches_idx = list(edge_geom_sindex.intersection(isrm_geom.bounds)) + if not possible_matches_idx: + continue + + possible_matches = edge_geom_gdf.iloc[possible_matches_idx] + + # Further filter to only those that actually intersect + intersecting_edges = possible_matches[possible_matches.geometry.intersects(isrm_geom)] + + if len(intersecting_edges) == 0: + continue + + # For each intersecting edge, calculate intersection + for edge_idx, edge_row in intersecting_edges.iterrows(): + osm_id = edge_row['osm_id'] + edge_geom = edge_row.geometry + original_length = edge_row['edge_length'] + + # Get the actual edge length from geometry for proportion calculation + edge_geom_length = edge_geom.length + + # Get the actual intersection geometry + intersection_geom = edge_geom.intersection(isrm_geom) + + # Skip empty geometries + if intersection_geom.is_empty: + continue + + # Calculate the proportion of the edge length in this ISRM polygon + intersection_length = intersection_geom.length + proportion = round(intersection_length / edge_geom_length, 2) if edge_geom_length > 0 else 0 + proportional_length = original_length * proportion + + # Create a record for this intersection + result = { + 'isrm_id': isrm_id, + 'osm_id': osm_id, + 'edge_id': edge_row['edge_id'], + 'original_edge_length': original_length, + 'proportion': proportion, + 'proportional_length': proportional_length, + 'isrm_osm_id': f"{isrm_id}-{osm_id}", + 'geometry': intersection_geom + } + + # Copy all attributes from edge + for key, value in edge_row.items(): + if key not in ['geometry', 'osm_id', 'edge_length', 'edge_id'] and key not in result: + result[f'edge_{key}'] = value + + # Copy all attributes from ISRM polygon + for key, value in isrm_row.items(): + if key not in ['geometry', 'isrm'] and key not in result: + result[f'isrm_{key}'] = value + + intersection_results.append(result) + + logger.info(f"Intersection produced {len(intersection_results)} results") + + # Create a GeoDataFrame from results + if not intersection_results: + logger.error("No intersections found") + sys.exit(1) + + result_gdf = gpd.GeoDataFrame(intersection_results, geometry='geometry', crs=epsg_utm) + + # Convert results back to WGS84 for output + logger.info(f"Converting results back to WGS84 (EPSG:{WGS84_EPSG})") + result_gdf = result_gdf.to_crs(epsg=WGS84_EPSG) + + # Save results + logger.info(f"Saving results to {output_path}") + output_dir = os.path.dirname(output_path) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir) + + # Determine output format based on file extension + extension = os.path.splitext(output_path)[1].lower() + if extension == '.gpkg': + result_gdf.to_file(output_path, driver='GPKG') + elif extension == '.shp': + result_gdf.to_file(output_path) + elif extension == '.geojson': + result_gdf.to_file(output_path, driver='GeoJSON') + elif extension == '.csv': + # For CSV, we need to export geometry as WKT + result_gdf['geometry_wkt'] = result_gdf.geometry.apply(lambda geom: geom.wkt) + result_df = pd.DataFrame(result_gdf.drop(columns='geometry')) + result_df.to_csv(output_path, index=False) + else: + logger.info(f"Unrecognized output format: {extension}, using GPKG format") + result_gdf.to_file(output_path, driver='GPKG') + + logger.info("Processing complete") + return result_gdf + + +def map_beam_network_to_isrm_osm_intersection(network_path, isrm_osm_path, output_path): + """ + Map network data to ISRM-OSM intersection data using attributeOrigId to match osm_id. + + Args: + network_path (str): Path to the network.csv.gz file + isrm_osm_path (str): Path to the ISRM-OSM intersection GeoJSON file + output_path (str): Path to save the output file + + Returns: + pd.DataFrame: The resulting DataFrame with mapping results + """ + # 1. Load network data + logger.info(f"Loading network data from {network_path}") + try: + network_df = pd.read_csv(network_path) + logger.info(f"Loaded network data with {len(network_df)} rows") + + # Check if attributeOrigId column exists + if 'attributeOrigId' not in network_df.columns: + logger.error("Network file is missing 'attributeOrigId' column") + return None + except Exception as e: + logger.error(f"Failed to load network data: {e}") + return None + + # Filter out rows with empty or NaN attributeOrigId + network_df = network_df.dropna(subset=['attributeOrigId']) + # 2. Load ISRM-OSM intersection data + logger.info(f"Loading ISRM-OSM intersection data from {isrm_osm_path}") + try: + isrm_osm_gdf = gpd.read_file(isrm_osm_path) + logger.info(f"Loaded ISRM-OSM data with {len(isrm_osm_gdf)} rows") + + # Check if osm_id column exists + if 'osm_id' not in isrm_osm_gdf.columns: + logger.error("ISRM-OSM file is missing 'osm_id' column") + return None + except Exception as e: + logger.error(f"Failed to load ISRM-OSM data: {e}") + return None + + # 3. Convert osm_id to the same type as attributeOrigId for proper joining + logger.info("Preparing data for mapping") + + # Make sure both ID columns are of the same type + network_df['attributeOrigId'] = network_df['attributeOrigId'].astype(int) + isrm_osm_gdf['osm_id'] = isrm_osm_gdf['osm_id'].astype(int) + + # 4. Merge the network and ISRM-OSM data + logger.info("Merging network data with ISRM-OSM data") + merged_df = pd.merge( + network_df, + isrm_osm_gdf, + left_on='attributeOrigId', + right_on='osm_id', + how='inner' + ) + + logger.info(f"Merged result has {len(merged_df)} rows") + + # 5. Calculate the proportional network values based on the ISRM-OSM proportion + logger.info("Calculating proportional values") + + # Apply proportion to network length + merged_df['proportional_network_length'] = merged_df['linkLength'] * merged_df['proportion'] + + # 6. Create a unique identifier combining network linkId and ISRM id + merged_df['network_isrm_id'] = merged_df['linkId'].astype(str) + '-' + merged_df['isrm_id'].astype(str) + + # 7. Save the result + logger.info(f"Saving mapped results to {output_path}") + output_dir = os.path.dirname(output_path) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir) + + # Determine output format based on file extension + extension = os.path.splitext(output_path)[1].lower() + if extension == '.gpkg': + if isinstance(merged_df, gpd.GeoDataFrame): + merged_df.to_file(output_path, driver='GPKG') + else: + # Convert to GeoDataFrame if it's just a DataFrame + logger.warning("Converting DataFrame to GeoDataFrame for GPKG output") + geo_merged_df = gpd.GeoDataFrame(merged_df, geometry='geometry') + geo_merged_df.to_file(output_path, driver='GPKG') + elif extension == '.geojson': + if isinstance(merged_df, gpd.GeoDataFrame): + merged_df.to_file(output_path, driver='GeoJSON') + else: + # Convert to GeoDataFrame if it's just a DataFrame + logger.warning("Converting DataFrame to GeoDataFrame for GeoJSON output") + geo_merged_df = gpd.GeoDataFrame(merged_df, geometry='geometry') + geo_merged_df.to_file(output_path, driver='GeoJSON') + elif extension == '.csv': + # For CSV, we drop the geometry column if it exists + if 'geometry' in merged_df.columns: + # Save the WKT representation of geometry + merged_df['geometry_wkt'] = merged_df['geometry'].apply(lambda geom: geom.wkt if geom else None) + merged_df = merged_df.drop(columns='geometry') + merged_df.to_csv(output_path, index=False) + else: + logger.warning(f"Unrecognized output format: {extension}, using CSV format") + merged_df.to_csv(output_path, index=False) + + logger.info("Mapping complete") + return merged_df + + +def main(): + """Main execution function with hardcoded paths.""" + area = "seattle" + study_area_config = get_area_config(area) + study_area_config["graph_layers"]["residential"]["min_density_per_km2"] = 412 + + network_name = generate_network_name(study_area_config) + work_dir = study_area_config["work_dir"] + network_dir = f'{work_dir}/network/{network_name}' + + # Input/output paths + isrm_grid_path = os.path.expanduser(f"{work_dir}/inmap/ISRM/isrm_polygon.shp") + osm_geojson_path = os.path.expanduser(f"{network_dir}/{network_name}.osm.geojson") + osm_gpkg_path = os.path.expanduser(f"{network_dir}/{network_name}.gpkg") + isrm_osm_dir = os.path.expanduser(f"{work_dir}/inmap/isrm-{network_name}") + os.makedirs(isrm_osm_dir, exist_ok=True) + isrm_osm_geojson_path = os.path.expanduser(f"{isrm_osm_dir}/isrm-{network_name}.geojson") + beam_network = os.path.expanduser(f"{network_dir}/network.csv.gz") + output2_path = os.path.expanduser(f"{isrm_osm_dir}/isrm-beam--network-intersection.geojson") + + if not os.path.exists(isrm_osm_geojson_path): + # Process the intersection + process_isrm_osm_intersection( + isrm_grid_path=isrm_grid_path, + osm_geojson_path=osm_geojson_path, + osm_gpkg_path=osm_gpkg_path, + epsg_utm=study_area_config["utm_epsg"], + output_path=isrm_osm_geojson_path + ) + + # Map BEAM Network with isrm osm intersection + map_beam_network_to_isrm_osm_intersection( + beam_network, + isrm_osm_geojson_path, + output2_path + ) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/main/python/network_validation/run_beam_network_validation_against_npmrds.py b/src/main/python/network_validation/run_beam_network_validation_against_npmrds.py index c459082118c..1d0065bbb2f 100644 --- a/src/main/python/network_validation/run_beam_network_validation_against_npmrds.py +++ b/src/main/python/network_validation/run_beam_network_validation_against_npmrds.py @@ -1,16 +1,46 @@ -import seaborn as sns -from pathlib import Path from validation_utils import * +from pathlib import Path +import sys + +# Get the absolute path to the directory containing this script +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(os.path.dirname(current_dir)) +sys.path.insert(0, parent_dir) + +from python.utils.study_area_config import get_area_config +from python.utils.study_area_config import generate_network_name -work_dir = os.path.expanduser("~/Workspace/Simulation") # beam run i.e. link stats and events file # study_area = "seattle" study_area = "sfbay" +batch = "calibration" +scenario = "2018-Baseline-20250702-FC05-0" +run_link_speed_validation = True +run_network_speed_validation = True +run_vmt_validation = False + + +config = get_area_config(study_area) +config["network"]["graph_layers"]["residential"]["min_density_per_km2"] = 5500 +study_area_dir = config["work_dir"] +network_name = generate_network_name(config) +network_dir = f'{config["work_dir"]}/network/{network_name}' +run_dir = f"{config["work_dir"]}/beam-runs/{batch}/{scenario}" + +# run_dir = os.path.expanduser("~/Workspace/Simulation/seattle/beam/runs/2024-04-20/Baseline") +batch_label = batch.replace("-", "") +scenario_label = scenario.replace("_", "-") +link_stats = [ + LinkStats(scenario=f"{batch}_{scenario_label}", demand_fraction=0.1, + file_path=os.path.join(run_dir, "3.linkstats.csv.gz")) +] +vehicle_types_files = [( + batch_label, + scenario_label, + study_area_dir + f"/beam-runs/{batch}/{scenario}/0.events.csv.gz", + study_area_dir + f"/beam-freight/{batch}/{scenario}/vehicle-tech/ft-vehicletypes--{batch_label}--{scenario_label}.csv" +)] -study_area_dir = work_dir + "/" + study_area - -# beam_network_mapped_to_npmrds_geo = study_area_dir + '/validation/beam/' + study_area + '_unclassified_simplified_network_mapped_to_npmrds.geojson' -beam_network_mapped_to_npmrds_geo = study_area_dir + '/validation/beam/' + study_area + '_residential_simpl_network_mapped_to_npmrds.geojson' # validation data @@ -18,72 +48,50 @@ # npmrds_data_csv = study_area_dir + '/validation/npmrds/seattle_npmrds_data.csv' # npmrds_hourly_speed_csv = study_area_dir + '/validation/npmrds/seattle_npmrds_hourly_speeds.csv' # npmrds_hourly_speed_by_road_class_csv = study_area_dir + '/validation/npmrds/' + study_area + '_npmrds_hourly_speed_by_road_class.csv' -npmrds_station_geo = study_area_dir + '/validation/npmrds/' + study_area + '_npmrds_station.geojson' -npmrds_data_csv = study_area_dir + '/validation/npmrds/' + study_area + '_npmrds_data.csv' -npmrds_hourly_speed_csv = study_area_dir + '/validation/npmrds/' + study_area + '_npmrds_hourly_speeds.csv' -npmrds_hourly_speed_by_road_class_csv = study_area_dir + '/validation/npmrds/' + study_area + '_npmrds_hourly_speed_by_road_class.csv' - - # ########## Initialize -setup = SpeedValidationSetup(npmrds_hourly_speed_csv=npmrds_hourly_speed_csv, - npmrds_hourly_speed_by_road_class_csv=npmrds_hourly_speed_by_road_class_csv, - beam_network_mapped_to_npmrds_geo=beam_network_mapped_to_npmrds_geo) - - -run_link_speed_validation = False -run_network_speed_validation = False +setup = SpeedValidationSetup(npmrds_hourly_speed_csv=f"{run_dir}/{study_area}_npmrds_hourly_speeds.csv", + npmrds_hourly_speed_by_road_class_csv=f"{run_dir}/{study_area}_npmrds_hourly_speed_by_road_class.csv", + beam_network_mapped_to_npmrds_geo=f"{run_dir}/{study_area}_network_mapped_to_npmrds.geojson") -if run_link_speed_validation or run_network_speed_validation: +if run_link_speed_validation or run_network_speed_validation or run_vmt_validation: # The rest is automatically generated - # run_dir = os.path.expanduser("~/Workspace/Simulation/seattle/beam/runs/2024-04-20/Baseline") - run_dir = study_area_dir + "/beam-runs/calibration-jdeqsim/sfbay-calib--rs-101010-netset5__2024-07-09_04-48-50_tww" output_dir = run_dir + '/validation_output' plots_dir = output_dir + '/plots' Path(output_dir).mkdir(parents=True, exist_ok=True) Path(plots_dir).mkdir(parents=True, exist_ok=True) - link_stats = [ - LinkStats(scenario="BEAM_netset1", demand_fraction=0.1, file_path=run_dir + "/12.linkstats.csv.gz") - ] # link_stats = [LinkStats(scenario="BEAM", demand_fraction=0.3, file_path=run_dir + "/0.linkstats.csv.gz")] print("Run: " + str(link_stats)) processed_link_stats = setup.process_these_link_stats(link_stats=link_stats, assume_daylight_saving=True) else: processed_link_stats = None + output_dir = '' + plots_dir = '' # ######################################### # ########## Network-level speed validation # ######################################### if run_network_speed_validation: - hourly_speed = setup.get_hourly_average_speed(processed_link_stats) - - # Plot hourly network speed - plt.figure() - sns.lineplot(x='hour', y='speed', hue='scenario', data=hourly_speed, errorbar=('ci', 95)) - plt.ylim([0, 70]) - plt.title("Network-level Speed Validation") - plt.savefig(plots_dir + '/' + study_area + '_beam_npmrds_network_speed_validation.png', dpi=200) - plt.show(block=False) - hourly_speed_by_road_class = setup.get_hourly_average_speed_by_road_class(processed_link_stats) + hourly_speed_by_road_class_no_npmrds = hourly_speed_by_road_class[~hourly_speed_by_road_class['scenario'].str.contains("npmrds", case=False, na=False)] # plot hourly network speed by road class plt.figure() g = sns.relplot(x='hour', y='speed', hue='road_class', col='scenario', kind="line", - data=hourly_speed_by_road_class, + data=hourly_speed_by_road_class_no_npmrds, errorbar=('ci', 95), facet_kws={'sharey': True, 'sharex': True}) g.set_titles("{col_name}") g.fig.suptitle('Network-level Speed Validation by Road Class', fontsize=16, y=0.98) g.set_xlabels("Hour") g.set_ylabels("Speed (mph)") - g._legend.set_title("Road Category") + g.legend.set_title("Road Category") plt.subplots_adjust(top=0.85) plt.ylim([0, 70]) plt.savefig(plots_dir + '/' + study_area + '_beam_npmrds_network_speed_road_class_validation.png', dpi=200) plt.show(block=False) - hourly_speed_by_road_class.to_csv(output_dir + '/' + study_area + '_beam_npmrds_network_speed_road_class_validation.csv', index=False) - + hourly_speed_by_road_class_no_npmrds.to_csv( + output_dir + '/' + study_area + '_beam_npmrds_network_speed_road_class_validation.csv', index=False) # ###################################### # ########## Link-level speed validation # ###################################### @@ -110,17 +118,49 @@ g.fig.suptitle('Link-Level Speed Validation by Road Class', fontsize=16, y=0.98) g.set_xlabels("Hour") g.set_ylabels("Speed (mph)") - g._legend.set_title("Road Category") + g.legend.set_title("Road Category") plt.subplots_adjust(top=0.85) plt.ylim([0, 70]) plt.savefig(plots_dir + '/' + study_area + '_beam_npmrds_link_speed_road_class_validation.png', dpi=200) plt.show(block=False) - hourly_link_speed_by_road_class.to_csv(output_dir + '/' + study_area + '_beam_npmrds_link_speed_road_class_validation.csv', index=False) - + hourly_link_speed_by_road_class.to_csv( + output_dir + '/' + study_area + '_beam_npmrds_link_speed_road_class_validation.csv', index=False) # Plot average link speed by link # average_link_speed = setup.get_average_link_speed() # average_link_speed.to_csv(output_dir + '/' + study_area + '_average_link_speed.csv', index=False) + +# if run_vmt_validation: +# pts = pd.DataFrame() +# # Read vehicle types +# for (batch, scenario, events_file, veh_types_file) in vehicle_types_files: +# # Read and process freight events +# run = read_events( +# events_file, +# veh_types_file, +# batch, +# scenario +# ) +# pt = get_ft_path_traversals(run) +# pts = pd.concat([pts, pt]) +# +# # Process baseline data +# baseline_summary, baseline_summary_levels, baseline_summary_colors = process_ft_path_traversals(baseline_runs, +# baseline_runs_name, +# baseline_output_dir) +# +# # Validate VMT +# validation = validate_vmt(baseline_summary, WORK_DIR) +# +# # Create plots +# plot_results( +# baseline_summary, +# validation, +# baseline_summary_colors, +# baseline_output_dir, +# "2024-08-07" +# ) + print("END") diff --git a/src/main/python/network_validation/validation_utils.py b/src/main/python/network_validation/validation_utils.py index 42ffa479c61..8117c092a87 100644 --- a/src/main/python/network_validation/validation_utils.py +++ b/src/main/python/network_validation/validation_utils.py @@ -1,11 +1,18 @@ +import json +import os import time +import zipfile +from urllib.request import urlretrieve +import contextily as ctx import geopandas as gpd import matplotlib.pyplot as plt import numpy as np +import osmnx as ox import pandas as pd import pyarrow.csv as pv -import os +import requests +import seaborn as sns plt.style.use('ggplot') meter_to_mile = 0.000621371 @@ -20,20 +27,18 @@ 7.0: 'Local'} roadclass_to_fsystem_lookup = {value: key for key, value in fsystem_to_roadclass_lookup.items()} beam_to_roadclass_lookup = {'motorway': fsystem_to_roadclass_lookup[1.0], - 'motorway_link': fsystem_to_roadclass_lookup[2.0], + 'motorway_link': fsystem_to_roadclass_lookup[2.0], # Links connect to lower classes 'trunk': fsystem_to_roadclass_lookup[2.0], - 'trunk_link': fsystem_to_roadclass_lookup[2.0], + 'trunk_link': fsystem_to_roadclass_lookup[3.0], # One class down 'primary': fsystem_to_roadclass_lookup[3.0], - 'primary_link': fsystem_to_roadclass_lookup[4.0], + 'primary_link': fsystem_to_roadclass_lookup[4.0], # One class down 'secondary': fsystem_to_roadclass_lookup[4.0], - 'secondary_link': fsystem_to_roadclass_lookup[5.0], + 'secondary_link': fsystem_to_roadclass_lookup[5.0], # One class down 'tertiary': fsystem_to_roadclass_lookup[5.0], - 'tertiary_link': fsystem_to_roadclass_lookup[6.0], + 'tertiary_link': fsystem_to_roadclass_lookup[6.0], # One class down 'unclassified': fsystem_to_roadclass_lookup[6.0], - 'residential': fsystem_to_roadclass_lookup[7.0], - 'living_street': fsystem_to_roadclass_lookup[7.0], - 'road': fsystem_to_roadclass_lookup[7.0], - np.nan: fsystem_to_roadclass_lookup[7.0]} + 'residential': fsystem_to_roadclass_lookup[7.0] + } state_fips_to_code = { '01': 'AL', '02': 'AK', '04': 'AZ', '05': 'AR', '06': 'CA', '08': 'CO', '09': 'CT', '10': 'DE', '11': 'DC', '12': 'FL', @@ -91,7 +96,6 @@ def agg_npmrds_to_hourly_speed(npmrds_data, observed_speed_weight): def process_and_extend_link_stats(model_network, link_stats, assume_daylight_savings): - dfs = [] for link_stat in link_stats: df = pv.read_csv(link_stat.file_path).to_pandas() @@ -199,7 +203,8 @@ def process_regional_npmrds_station(region_boundary, npmrds_geo_file, npmrds_sce print(">> Select TMC within region boundaries") regional_npmrds_station_out = gpd.overlay(npmrds_station_proj, region_boundary, how='intersection') regional_npmrds_station_out['scenario'] = npmrds_scenario_label - regional_npmrds_station_out.loc[:, 'road_class'] = regional_npmrds_station_out.loc[:, 'F_System'].map(fsystem_to_roadclass_lookup) + regional_npmrds_station_out.loc[:, 'road_class'] = regional_npmrds_station_out.loc[:, 'F_System'].map( + fsystem_to_roadclass_lookup) regional_npmrds_station_out.rename(columns={'Tmc': 'tmc'}, inplace=True) return regional_npmrds_station_out @@ -261,127 +266,238 @@ def run_hourly_speed_mapping_by_road_class(npmrds_hourly_link_speed, link_stats) return pd.concat([beam_hourly_speed, npmrds_hourly_speed], axis=0) -def download_taz_shapefile(state_fips_code, year, output_dir): - import requests +def download_nhts_data(nhts_output_file, area_name, state_fips_code=None, + cbsa_codes=None, year=2017, download=True, extract=True, process=True): """ - Download TAZ shapefiles for a given state-level FIPS code. + Download, extract, and process NHTS data with filtering by state FIPS code + and/or CBSA codes. + Stores filtered data under directory with area name: data_nhts_dir/area_name/ Parameters: - - fips_code: String or integer representing the state-level FIPS code. - - output_dir: Directory to save the downloaded ZIP file. + - nhts_output_file: Path to save the downloaded NHTS zip file + - area_name: Name of the area for organizing filtered data + - state_fips_code: String representing the state FIPS code (e.g., '06' for California) + - cbsa_codes: List of CBSA codes (e.g., [41860] for San Francisco-Oakland-Hayward, CA) + - year: NHTS survey year (default: 2017) + - download: Boolean to control if download should occur + - extract: Boolean to control if extraction should occur + - process: Boolean to control if processing should occur + + Returns: + - Dictionary of filtered DataFrames """ - # Ensure the FIPS code is a string, padded to 2 characters - fips_code_str = str(state_fips_code).zfill(2) - - # Construct the download URL - base_url = f"https://www2.census.gov/geo/tiger/TIGER2010/TAZ/2010/" - filename = f"tl_{year}_{fips_code_str}_taz10.zip" - download_url = base_url + filename - - # Make the output directory if it doesn't exist - if not os.path.exists(output_dir): - os.makedirs(output_dir) - - # Full path for saving the file - output_path = os.path.join(output_dir, filename) - - # Start the download - print(f"Downloading TAZ shapefile for FIPS code {state_fips_code} from {download_url}") - try: - response = requests.get(download_url) - response.raise_for_status() # This will check for errors - - # Write the content of the response to a ZIP file - with open(output_path, 'wb') as file: - file.write(response.content) - - print(f"File saved to {output_path}") - - except requests.RequestException as e: - print(f"Error downloading the file: {e}") - - return output_path - - -def collect_taz_boundaries(state_fips_code, year, output_dir): - from zipfile import ZipFile - state_geo_zip = output_dir + f"/tl_{year}_{state_fips_code}_taz10.zip" - if not os.path.exists(state_geo_zip): - state_geo_zip = download_taz_shapefile(state_fips_code, year, output_dir) - """ - Read a shapefile from a ZIP archive, filter geometries by county FIPS codes, - and write the result to a GeoJSON file. - - Parameters: - - zip_file_path: Path to the ZIP file containing the shapefile. - - county_fips_codes: List of county FIPS codes to filter by. - - output_geojson_path: Path to save the filtered data as a GeoJSON file. - """ - # Extract the shapefile from the ZIP archive - with ZipFile(state_geo_zip, 'r') as zip_ref: - # Extract all files to a temporary directory - temp_dir = "temp_shp" - zip_ref.extractall(temp_dir) - - # Find the .shp file in the extracted files - shapefile_name = [f for f in os.listdir(temp_dir) if f.endswith('.shp')][0] - shapefile_path = os.path.join(temp_dir, shapefile_name) - - # Read the shapefile into a GeoDataFrame - gdf = gpd.read_file(shapefile_path) - - # Clean up the temporary directory - for filename in os.listdir(temp_dir): - os.remove(os.path.join(temp_dir, filename)) - os.rmdir(temp_dir) - - return gdf - - -def collect_geographic_boundaries(state_fips_code, county_fips_codes, year, study_area_geo_path, projected_coordinate_system, geo_level): - from pygris import counties, block_groups - - if geo_level == 'county': - # Define fips code for selected counties - geo_data = counties(state=state_fips_code, year=year, cb=True, cache=True) - elif geo_level == 'cbg': - # Define fips code for selected counties - geo_data = block_groups(state=state_fips_code, year=year, cb=True, cache=True) - elif geo_level == 'taz': - geo_data = collect_taz_boundaries(state_fips_code, year, os.path.dirname(study_area_geo_path)) + # Set URL based on year + if year >= 2016: + url = "https://nhts.ornl.gov/assets/2016/download/csv.zip" else: - raise ValueError("Unsupported geographic level. Choose 'counties' or 'cbgs'.") - - countyfp_columns = [col for col in geo_data.columns if col.startswith('COUNTYFP')] - mask = geo_data[countyfp_columns].apply(lambda x: x.isin(county_fips_codes)).any(axis=1) - selected_geo = geo_data[mask] - - # def string_to_double(s): - # return float(s if s != "" else "0") - # - # # Prepare columns and mask - # aland_columns = [col for col in selected_geo.columns if col.startswith('ALAND')] - # awater_columns = [col for col in selected_geo.columns if col.startswith('AWATER')] - # for col in aland_columns + awater_columns: - # selected_geo.loc[:, col] = selected_geo[col].apply(string_to_double) - # mask = pd.Series([False] * len(selected_geo), index=selected_geo.index) - # - # for aland_col, awater_col in zip(aland_columns, awater_columns): - # # AWATER should not be more than three times ALAND - # mask |= (selected_geo[aland_col] > 0) & (selected_geo[awater_col] < 3 * selected_geo[aland_col]) - # - # # Apply the mask to filter selected_geo - # selected_geo = selected_geo[mask] - - base_name, extension = os.path.splitext(study_area_geo_path) - - study_area_geo_projected_path = base_name+"_epsg"+str(projected_coordinate_system)+extension - selected_geo.to_crs(epsg=projected_coordinate_system).to_file(study_area_geo_projected_path, driver="GeoJSON") - - selected_geo_wgs84 = selected_geo.to_crs(epsg=4326) - selected_geo_wgs84.to_file(base_name+"_wgs84"+extension, driver="GeoJSON") - return selected_geo_wgs84 - + print(f"Error: NHTS data for year {year} is not supported.") + return None + + data_nhts_dir = os.path.dirname(nhts_output_file) + + # Create area-specific directory + area_dir = os.path.join(data_nhts_dir, area_name) + os.makedirs(area_dir, exist_ok=True) + print(f"Created directory for area: {area_dir}") + + # Create a filter description for file naming + filter_desc = "" + if state_fips_code: + filter_desc += f"fips_{state_fips_code}" + if cbsa_codes: + filter_desc += f"_cbsa_{'_'.join(map(str, cbsa_codes))}" + + # Save filter information to a JSON file for reference + filter_info = { + "area_name": area_name, + "state_fips_code": state_fips_code, + "cbsa_codes": cbsa_codes, + "year": year, + "timestamp": time.strftime("%Y-%m-%d %H:%M:%S") + } + + with open(os.path.join(area_dir, "filter_info.json"), "w") as f: + json.dump(filter_info, f, indent=2) + + # Check if the file already exists + if os.path.exists(nhts_output_file): + file_size = os.path.getsize(nhts_output_file) / (1024 * 1024) # Size in MB + print(f"File {nhts_output_file} already exists ({file_size:.1f} MB). Skipping download.") + else: + print(f"Downloading NHTS {year} data...") + # Download the file with progress reporting + response = requests.get(url, stream=True) + if response.status_code == 200: + total_size = int(response.headers.get('content-length', 0)) + downloaded = 0 + start_time = time.time() + + with open(nhts_output_file, "wb") as file: + for chunk in response.iter_content(chunk_size=1024 * 1024): # 1MB chunks + if chunk: + file.write(chunk) + downloaded += len(chunk) + + # Calculate and display progress + percent = int(100 * downloaded / total_size) if total_size > 0 else 0 + elapsed = time.time() - start_time + rate = downloaded / (1024 * 1024 * elapsed) if elapsed > 0 else 0 + + print( + f"\rDownloading: {percent}% ({downloaded / (1024 * 1024):.1f}MB of {total_size / (1024 * 1024):.1f}MB) at {rate:.1f} MB/s", + end="") + + print(f"\nDownloaded {nhts_output_file}") + else: + print(f"Failed to download. Status code: {response.status_code}") + print(f"Response: {response.text[:500]}...") + return None + + # Create a temporary directory for extraction + temp_extract_dir = os.path.join(data_nhts_dir, "temp_extract") + os.makedirs(temp_extract_dir, exist_ok=True) + + # Check if data has already been extracted to temp directory + extracted_files_exist = os.path.exists(f"{temp_extract_dir}/hhpub.csv") or os.path.exists( + f"{temp_extract_dir}/trippub.csv") + + if not extracted_files_exist and extract: + # Extract the downloaded ZIP file to temp directory + print("\nExtracting files to temporary directory...") + try: + with zipfile.ZipFile(nhts_output_file, "r") as zip_ref: + zip_ref.extractall(temp_extract_dir) + print("Files extracted successfully") + except zipfile.BadZipFile: + print("Error: The downloaded file is not a valid ZIP file.") + print("The file may be corrupted. Please try downloading again.") + return None + except Exception as e: + print(f"Error extracting files: {str(e)}") + return None + elif extract: + extract_again = input("Data files already exist in temp directory. Extract again? (y/n): ").lower() == 'y' + if extract_again: + print("\nExtracting files to temporary directory...") + try: + with zipfile.ZipFile(nhts_output_file, "r") as zip_ref: + zip_ref.extractall(temp_extract_dir) + print("Files extracted successfully") + except Exception as e: + print(f"Error extracting files: {str(e)}") + return None + else: + print("Skipping extraction.") + else: + print("Skipping extraction.") + + # List the extracted files + files = os.listdir(temp_extract_dir) + print(f"\nFiles in temporary extraction directory: {len(files)} files") + + # Process key datasets with focus on filtered areas + datasets = { + "Households": "hhpub.csv", + "Persons": "perpub.csv", + "Trips": "trippub.csv", + "Vehicles": "vehpub.csv" + } + + filtered_dfs = {} + + if not process: + print("Skipping data processing as requested.") + return None + + for dataset_name, filename in datasets.items(): + # Define output path in the area-specific directory + area_output_file = os.path.join(area_dir, filename) + + # Check if filtered file already exists in area directory + if os.path.exists(area_output_file): + process_this = input( + f"Filtered {dataset_name} data already exists in {area_name} directory. Process again? (y/n): ").lower() == 'y' + if not process_this: + filtered_dfs[dataset_name] = pd.read_csv(area_output_file) + print(f"Loaded existing filtered {dataset_name} data from {area_name} directory.") + continue + + if filename in files: + print(f"\nProcessing {dataset_name} dataset...") + file_path = os.path.join(temp_extract_dir, filename) + + # Load the CSV file + df = pd.read_csv(file_path) + print(f"Total records: {len(df)}") + + # Apply filters + filtered_df = df.copy() + + # Find columns for filtering + # 1. Find any column containing the word "FIPS" for state FIPS + state_fips_column = None + state_fips_columns = [col for col in df.columns if 'STFIPS' in col or ('FIPS' in col and 'ST' in col)] + + if state_fips_columns: + state_fips_column = state_fips_columns[0] + print(f"Found state FIPS column: {state_fips_column}") + + # 2. Find any column containing CBSA + cbsa_column = None + cbsa_columns = [col for col in df.columns if 'CBSA' in col] + + if cbsa_columns: + cbsa_column = cbsa_columns[0] + print(f"Found CBSA column: {cbsa_column}") + + # Apply filtering based on available columns and parameters + filter_applied = False + + # 1. Filter by CBSA if provided and column exists + if cbsa_codes and cbsa_column and cbsa_column in df.columns: + filtered_df = filtered_df[filtered_df[cbsa_column].isin(cbsa_codes)] + print(f"Records after CBSA filter: {len(filtered_df)}") + filter_applied = True + + # 2. Filter by state FIPS if provided and column exists + if state_fips_code and state_fips_column and state_fips_column in df.columns: + # Convert to integer for comparison if the column is numeric + if pd.api.types.is_numeric_dtype(filtered_df[state_fips_column]): + filtered_df = filtered_df[filtered_df[state_fips_column] == int(state_fips_code)] + else: + # Otherwise treat as string + filtered_df[state_fips_column] = filtered_df[state_fips_column].astype(str) + filtered_df = filtered_df[filtered_df[state_fips_column] == state_fips_code] + print(f"Records after state FIPS filter: {len(filtered_df)}") + filter_applied = True + + if not filter_applied: + print("Warning: No filters applied. No matching columns found for the provided filter criteria.") + print(f"Available columns: {', '.join(df.columns[:10])}...") + + # Save filtered data to area-specific directory + filtered_df.to_csv(area_output_file, index=False) + print(f"Filtered data saved to {area_output_file}") + + # Store in dictionary + filtered_dfs[dataset_name] = filtered_df + + # Display sample data + print("\nSample data (first 3 rows):") + print(filtered_df.head(3)) + + # Display column information + print(f"\nNumber of columns: {len(filtered_df.columns)}") + print(f"Sample columns: {filtered_df.columns[:5].tolist()}") + else: + print(f"\nWarning: {filename} not found in extracted files") + + # Optionally clean up temporary extraction directory + print("Cleaning up temporary extraction directory") + import shutil + shutil.rmtree(temp_extract_dir) + print(f"Removed temporary directory: {temp_extract_dir}") + return filtered_dfs def map_cbg_to_taz(cbg_gdf, cbg_id_col, taz_gdf, taz_id_col, projected_coordinate_system, cbg_taz_map_csv): print(f"Mapping CBG to TAZ geometries") @@ -429,7 +545,6 @@ def prepare_npmrds_data( # output npmrds_station_geo, npmrds_data_csv, npmrds_hourly_speed_csv, npmrds_hourly_speed_by_road_class_csv, beam_network_car_links_geo, beam_npmrds_network_map_geo): - if os.path.exists(npmrds_station_geo): print(f"Reading {npmrds_station_geo}") regional_npmrds_station = gpd.read_file(npmrds_station_geo) @@ -443,7 +558,8 @@ def prepare_npmrds_data( regional_npmrds_data = pv.read_csv(npmrds_data_csv).to_pandas() else: print("Process NPMRDS data") - regional_npmrds_data = process_regional_npmrds_data(npmrds_raw_data_csv, npmrds_label, regional_npmrds_station['tmc'].unique()) + regional_npmrds_data = process_regional_npmrds_data(npmrds_raw_data_csv, npmrds_label, + regional_npmrds_station['tmc'].unique()) regional_npmrds_data.to_csv(npmrds_data_csv, index=False) if os.path.exists(npmrds_hourly_speed_csv): @@ -496,7 +612,8 @@ def __repr__(self): class SpeedValidationSetup: - def __init__(self, npmrds_hourly_speed_csv, npmrds_hourly_speed_by_road_class_csv, beam_network_mapped_to_npmrds_geo): + def __init__(self, npmrds_hourly_speed_csv, npmrds_hourly_speed_by_road_class_csv, + beam_network_mapped_to_npmrds_geo): st = time.time() print("Loading data ...") self.npmrds_hourly_speed = pv.read_csv(npmrds_hourly_speed_csv).to_pandas() @@ -654,5 +771,306 @@ def save_filtered_data(df, filename): save_filtered_data(speed_param, "min_speed_all_roads") +def plot(G, name): + fig, ax = ox.plot.plot_graph( + G, + bgcolor="#FFFFFF", # Light background + # node_color="#00FFAA", # Bright teal nodes + node_color="#333333", # Bright teal nodes + node_size=0.02, + node_edgecolor='none', # Node size 2.5 + # node_alpha=0.8, # Node transparency + # node_edgecolor="#333333", # Dark edges around nodes + node_zorder=3, # Nodes above edges + edge_color="#FF5A5F", # Bright coral edges + edge_linewidth=0.2, # Edge thickness 0.5 + edge_alpha=0.8, # Edge transparency + show=False, # Do not display immediately + close=False # Keep the plot open for saving + ) + + ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron, zoom=20) + + # 3. Calculate statistics + num_nodes = len(G.nodes) + num_edges = len(G.edges) + # Total length in meters + total_length = sum(data.get('length', 0) for u, v, key, data in G.edges(keys=True, data=True)) + + # 4. Add title with statistics + title = ( + f"Nodes: {num_nodes} | Edges: {num_edges} | Total Length: {total_length / 1000:.2f} km" + ) + ax.set_title(title, fontsize=15, fontweight='bold', color='black', pad=20) + + # 5. Save the figure with 600 DPI + fig.savefig(f'{name}', dpi=600, bbox_inches='tight') + + +def download_h5_data(url: str, output_path: str) -> str: + """ + Download H5 data file if it doesn't exist locally and explore its structure. + + Parameters: + ----------- + url : str + URL to download the H5 file from + output_path : str + Local path to save the downloaded file + + Returns: + -------- + str + Path to the H5 file + """ + import h5py + # Check if file exists locally first + if not os.path.exists(output_path): + print(f"\nDownloading H5 data from {url}...") + urlretrieve(url, output_path) + print("✓ H5 data downloaded") + else: + print("\nUsing existing H5 data file") + + # Explore H5 file structure + print("\nExploring H5 file structure...") + + def print_structure(name, obj): + """Helper function to print H5 structure""" + if isinstance(obj, h5py.Dataset): + try: + shape = obj.shape + dtype = obj.dtype + print(f"Dataset: {name}") + print(f" Shape: {shape}") + print(f" Type: {dtype}") + + # Print first few items for small datasets or sample for large ones + if len(obj.shape) > 0: + if obj.shape[0] > 0: + sample_size = min(3, obj.shape[0]) + print(" Sample data:") + print(obj[:sample_size]) + except Exception as e: + print(f" Error reading dataset: {e}") + else: + print(f"Group: {name}") + + with h5py.File(output_path, 'r') as f: + print("\nFile structure:") + print("==============") + f.visititems(print_structure) + + # List all root level groups/datasets + print("\nRoot level items:") + for key in f.keys(): + print(f"- {key}") + return output_path + +#################################################################################################### +#################################################################################################### +########################################## VMT Validation ########################################## +#################################################################################################### +#################################################################################################### + +def read_events(event_file, veh_types_file, batch, scenario): + events = pd.read_csv(event_file) + events['batch'] = batch + events['scenario'] = scenario + # Merge with vehicle types + veh_types = pd.read_csv(veh_types_file) + events_veh_types = events.merge( + veh_types[['vehicleTypeId', 'vehicleCategory', 'primaryFuelType', 'secondaryFuelType']], + left_on='vehicleType', + right_on='vehicleTypeId' + ) + return events_veh_types + + +def get_ft_path_traversals(_events): + columns = ['time', 'type', 'vehicleType', 'vehicle', 'secondaryFuelLevel', + 'primaryFuelLevel', 'driver', 'mode', 'seatingCapacity', 'startX', + 'startY', 'endX', 'endY', 'capacity', 'arrivalTime', 'departureTime', + 'secondaryFuel', 'secondaryFuelType', 'primaryFuelType', + 'numPassengers', 'length', 'primaryFuel', 'runName', 'runLabel'] + + # Filter path traversals + pt = _events[_events['type'] == 'PathTraversal'].copy() + pt = pt[pt['vehicle'].str.startswith('freight', na=False)] + pt = pt[columns] + + if pt[pt['vehicle'].str.contains('-emergency-', na=False)].shape[0] > 0: + print("This is a bug") + + # Set energy type and codes + pt.loc[pt['vehicleType'].str.contains('E-PHEV', case=False, na=False), 'energyType'] = 'Electric' + pt.loc[pt['vehicleType'].str.contains('E-PHEV', case=False, na=False), 'energyTypeCode'] = 'PHEV' + pt.loc[pt['vehicleType'].str.contains('H2FC', case=False, na=False), 'energyType'] = 'Hydrogen' + pt.loc[pt['vehicleType'].str.contains('H2FC', case=False, na=False), 'energyTypeCode'] = 'H2FC' + + # Set vehicle categories + pt['vehicleCategory'] = 'Class 4-6 Vocational' + pt.loc[pt['vehicleType'].str.contains('-hdt-', na=False), 'vehicleCategory'] = 'Class 7&8 Tractor' + pt.loc[pt['vehicleType'].str.contains('-hdv-', na=False), 'vehicleCategory'] = 'Class 7&8 Vocational' + + # Set business type + pt['business'] = 'B2B' + pt.loc[pt['vehicle'].str.startswith('freightVehicle-b2c-', na=False), 'business'] = 'B2C' + + print("PT formatted") + return pt + + +def average_speed_vector(distances, speeds): + """Calculate average speed for vectors of distances and speeds""" + if any(speed == 0 for speed in speeds): + raise ValueError("Speeds must be non-zero.") + + total_distance = sum(distances) + total_time = sum(d / s for d, s in zip(distances, speeds)) + + return total_distance / total_time + + +def process_ft_path_traversals(_runs, _batch, _output_dir, _expansion_factor): + # Calculate summary statistics + runs_summary = _runs[_runs["batch"] == _batch].groupby( + ['energyTypeCode', 'vehicleClass', 'business', 'batch', 'scenario'] + ).agg({ + 'length': lambda x: _expansion_factor * sum(x / 1609.344) / 1e6, # MVMT + 'primaryFuel': lambda x: _expansion_factor * sum(x / 3.6e12) # GWH + }).reset_index() + + runs_summary.columns = ['energyTypeCode', 'vehicleClass', 'business', 'runLabel', 'MVMT', 'GWH'] + + # Create energy and vehicles types column + runs_summary['energyAndVehiclesTypes'] = runs_summary['energyTypeCode'] + ' ' + runs_summary['vehicleClass'] + + # Convert to categorical with specified order + runs_summary['energyAndVehiclesTypes'] = pd.Categorical( + runs_summary['energyAndVehiclesTypes'], + categories=[ + "Diesel Class 4-6 Vocational", + "Diesel Class 7&8 Vocational", + "Diesel Class 7&8 Tractor", + "BEV Class 7&8 Vocational" + ] + ) + + # Save summary to CSV + runs_summary.to_csv( + os.path.join(_output_dir, f"{_batch}_VMT-and-GWH-by-powertrain-class.csv"), + index=False + ) + + plot_results(runs_summary, + validation, + ["azure3", "darkgray", "azure4", "deepskyblue2"], + _output_dir, + _batch) + + return runs_summary + + +def read_vmt_frm_hpms(hpms_geo_file, study_area_geoid): + # Read and process HPMS data + link_aadt = gpd.read_file(hpms_geo_file) + link_aadt = link_aadt[link_aadt['GEOID'].str.startswith(study_area_geoid)] + """Calculate HPMS AADT statistics""" + link_aadt = link_aadt.copy() + link_aadt['Volume_hpms'] = link_aadt['AADT_Combi'] + link_aadt['AADT_Singl'] + link_aadt['VMT_hpms'] = link_aadt['Volume_hpms'] * link_aadt.geometry.length / 1609.0 + + vmt_hpms = link_aadt['VMT_hpms'].sum() + + # Calculate HPMS components + vmt_hpms_international = (vmt_hpms * 0.22) / 1e6 + vmt_hpms_through_traffic = (vmt_hpms * 0.1) / 1e6 + vmt_hpms_national = (vmt_hpms * 0.68) / 1e6 + + # Create validation DataFrame + validation = pd.DataFrame({ + 'label': ['HPMS'] * 3, + 'source': ['National', 'International', 'Through Traffic'], + 'MVMT': [vmt_hpms_national, vmt_hpms_international, vmt_hpms_through_traffic] + }) + + validation['source'] = pd.Categorical( + validation['source'], + categories=['Through Traffic', 'International', 'National'] + ) + + return validation + + +def validate_vmt(baseline_summary, work_dir): + # Read and process HPMS data + link_aadt = gpd.read_file(os.path.join(work_dir, "validation_data/HPMS/WA_HPMS_with_GEOID_LANEMILE.geojson")) + link_aadt = link_aadt[link_aadt['GEOID'].str.startswith(('53061', '53033', '53035', '53053'))] + link_aadt_dt = get_hpms_aadt(link_aadt) + + vmt_hpms = link_aadt_dt['VMT_hpms'].sum() + beam_baseline = baseline_summary[baseline_summary['runLabel'] == "Baseline"]['MVMT'].sum() + + # Calculate HPMS components + vmt_hpms_international = (vmt_hpms * 0.22) / 1e6 + vmt_hpms_through_traffic = (vmt_hpms * 0.1) / 1e6 + vmt_hpms_national = (vmt_hpms * 0.68) / 1e6 + + # Create validation DataFrame + validation = pd.DataFrame({ + 'label': ['FAMOS'] * 3 + ['HPMS'] * 3, + 'source': ['National', 'International', 'Through Traffic'] * 2, + 'MVMT': [beam_baseline, 0.0, 0.0, vmt_hpms_national, vmt_hpms_international, vmt_hpms_through_traffic] + }) + validation['source'] = pd.Categorical( + validation['source'], + categories=['Through Traffic', 'International', 'National'] + ) + + return validation + + +def plot_results(baseline_summary, validation, baseline_summary_colors, baseline_output_dir, baseline_runs_name): + # Plot VMT validation + plt.figure(figsize=(7, 4)) + sns.barplot(data=validation, x='label', y='MVMT', hue='source') + plt.title('Total VMT') + plt.xlabel('Source') + plt.ylabel('Million VMT') + plt.savefig(os.path.join(baseline_output_dir, f"{baseline_runs_name}_vmt_validation.png")) + plt.close() + + # Plot VMT by powertrain class + plt.figure(figsize=(7, 4)) + g = sns.barplot( + data=baseline_summary, + x='runLabel', + y='MVMT', + hue='energyAndVehiclesTypes', + palette=baseline_summary_colors + ) + plt.title('Total Truck Travel - Baseline') + plt.xlabel('Scenario') + plt.ylabel('VMT') + plt.xticks(rotation=0) + plt.savefig(os.path.join(baseline_output_dir, f"{baseline_runs_name}_VMT-by-powertrain-class.png")) + plt.close() + + # Plot Energy consumption + plt.figure(figsize=(7, 4)) + g = sns.barplot( + data=baseline_summary, + x='runLabel', + y='GWH', + hue='energyAndVehiclesTypes', + palette=baseline_summary_colors + ) + plt.title('Energy Consumption - Baseline') + plt.xlabel('Scenario') + plt.ylabel('GWh') + plt.xticks(rotation=0) + plt.savefig(os.path.join(baseline_output_dir, f"{baseline_runs_name}_GWH-by-powertrain-class.png")) + plt.close() diff --git a/src/main/python/utils/__init__.py b/src/main/python/utils/__init__.py new file mode 100644 index 00000000000..3341dca4bad --- /dev/null +++ b/src/main/python/utils/__init__.py @@ -0,0 +1,7 @@ +# python/utils/__init__.py + +# Import and expose the entire helper module +from . import compare_config_files +from . import log_filter_script +from . import study_area_config +from . import files_utils diff --git a/src/main/python/compare_config_files.py b/src/main/python/utils/compare_config_files.py similarity index 100% rename from src/main/python/compare_config_files.py rename to src/main/python/utils/compare_config_files.py diff --git a/src/main/python/utils/csv_data_analyzer.py b/src/main/python/utils/csv_data_analyzer.py new file mode 100644 index 00000000000..1e0450bee43 --- /dev/null +++ b/src/main/python/utils/csv_data_analyzer.py @@ -0,0 +1,262 @@ +import gzip +import sys +from collections import Counter +import pandas as pd + + +def parse_hstore_format(data_string): + """ + Parse fields that contain hstore format data + Format example: "oneway"=>"no","reversed"=>"False","length"=>"72.674",... + """ + if pd.isna(data_string) or not data_string: + return {} + + result = {} + # Handle the standard hstore format + try: + # Split by commas not inside quotes + parts = [] + in_quotes = False + current = "" + + for char in data_string: + if char == '"' and (not current or current[-1] != '\\'): + in_quotes = not in_quotes + + if char == ',' and not in_quotes: + parts.append(current) + current = "" + else: + current += char + + # Don't forget the last part + if current: + parts.append(current) + + # Process each part + for part in parts: + if "=>" in part: + key_val = part.split("=>") + if len(key_val) == 2: + key = key_val[0].strip().strip('"') + val = key_val[1].strip().strip('"') + result[key] = val + except Exception as e: + print(f"Error parsing tags: {e}") + if len(data_string) > 100: + print(f"Preview: {data_string[:100]}...") + else: + print(f"String: {data_string}") + + return result + + +def analyze_csv(file_path, sample_size=None): + """ + Analyze a CSV file (possibly gzipped) with OSM data + + Args: + file_path: Path to the CSV or CSV.GZ file + sample_size: Optional number of rows to sample (for large files) + + Returns: + Dictionary of statistics and DataFrame + """ + print(f"Analyzing file: {file_path}") + + # Determine if the file is gzipped + is_gzipped = file_path.endswith('.gz') + + try: + # Read the file (with optional sampling) + if is_gzipped: + if sample_size: + # For very large files, we use chunking + chunks = [] + with gzip.open(file_path, 'rt') as f: + # Read and process in chunks + for chunk in pd.read_csv(f, chunksize=min(100000, sample_size)): + chunks.append(chunk) + if sum(len(c) for c in chunks) >= sample_size: + break + df = pd.concat(chunks) + df = df.head(sample_size) + else: + # Read the entire file + with gzip.open(file_path, 'rt') as f: + df = pd.read_csv(f) + else: + # Regular CSV file + if sample_size: + df = pd.read_csv(file_path, nrows=sample_size) + else: + df = pd.read_csv(file_path) + + print(f"Loaded {len(df)} rows with {len(df.columns)} columns") + + # Get basic column statistics + column_stats = {} + for column in df.columns: + non_null_count = df[column].count() + unique_count = df[column].nunique() + + # Calculate top values + value_counts = df[column].value_counts().head(10).to_dict() + + column_stats[column] = { + 'count': non_null_count, + 'percent_present': round(non_null_count / len(df) * 100, 2), + 'unique_values': unique_count, + 'top_values': value_counts + } + + # Look for and parse hstore-formatted fields + other_tags_stats = {} + if 'other_tags' in df.columns: + print("Found 'other_tags' column, parsing nested data...") + + # Create a new column with parsed data + df['other_tags_parsed'] = df['other_tags'].apply(parse_hstore_format) + + # Extract all unique keys from other_tags + all_keys = set() + for tags_dict in df['other_tags_parsed'].dropna(): + if isinstance(tags_dict, dict): + all_keys.update(tags_dict.keys()) + + # For each key, collect statistics + for key in all_keys: + # Count occurrences and values + counter = Counter() + valid_entries = 0 + + for tags_dict in df['other_tags_parsed'].dropna(): + if isinstance(tags_dict, dict) and key in tags_dict: + counter[tags_dict[key]] += 1 + valid_entries += 1 + + other_tags_stats[key] = { + 'count': valid_entries, + 'percent_present': round(valid_entries / len(df) * 100, 2), + 'unique_values': len(counter), + 'top_values': dict(counter.most_common(10)) + } + + # Sort by frequency + other_tags_stats = {k: v for k, v in sorted( + other_tags_stats.items(), + key=lambda item: item[1]['count'], + reverse=True + )} + + # Create summary statistics + stats = { + 'total_rows': len(df), + 'columns': list(df.columns), + 'column_stats': column_stats, + 'other_tags_stats': other_tags_stats + } + + return stats, df + + except Exception as e: + print(f"Error analyzing CSV file: {e}") + return None, None + + +def print_column_stats(stats, limit=None): + """Print column statistics in a formatted way""" + columns = list(stats['column_stats'].keys()) + + print(f"\n=== Column Statistics ({len(columns)} columns) ===") + + for i, column in enumerate(columns): + if limit and i >= limit: + print(f"\n... and {len(columns) - limit} more columns.") + break + + data = stats['column_stats'][column] + print(f"\n{i + 1}. {column}: {data['count']} non-null values ({data['percent_present']}% filled)") + print(f" Unique values: {data['unique_values']}") + + if data['unique_values'] <= 20: # Only show all values for categorical columns + print(" Values:") + for val, count in data['top_values'].items(): + val_display = str(val) + if len(val_display) > 50: + val_display = val_display[:47] + "..." + print(f" - {val_display}: {count}") + else: + print(" Top values:") + for val, count in data['top_values'].items(): + val_display = str(val) + if len(val_display) > 50: + val_display = val_display[:47] + "..." + print(f" - {val_display}: {count}") + + +def print_nested_field_stats(stats, limit=None): + """Print statistics for keys inside hstore-formatted fields""" + if not stats.get('other_tags_stats'): + print("\n=== No hstore-formatted fields found ===") + return + + nested_keys = list(stats['other_tags_stats'].keys()) + + print(f"\n=== Nested Field Analysis ({len(nested_keys)} unique keys) ===") + + for i, key in enumerate(nested_keys): + if limit and i >= limit: + print(f"\n... and {len(nested_keys) - limit} more keys.") + break + + data = stats['other_tags_stats'][key] + print(f"\n{i + 1}. {key}: {data['count']} occurrences ({data['percent_present']}% of rows)") + print(f" Unique values: {data['unique_values']}") + + print(" Top values:") + for val, count in data['top_values'].items(): + val_display = str(val) + if len(val_display) > 50: + val_display = val_display[:47] + "..." + print(f" - {val_display}: {count}") + + +def main(file_path, sample_size=None): + """Main function to analyze a CSV file""" + # Parse sample size if provided + if sample_size and sample_size.isdigit(): + sample_size = int(sample_size) + else: + sample_size = None + + # Analyze the file + stats, df = analyze_csv(file_path, sample_size) + + if not stats: + print("Analysis failed.") + return + + print(f"\n=== CSV Analysis Summary ===") + print(f"Total rows: {stats['total_rows']}") + print(f"Total columns: {len(stats['columns'])}") + + # Print column statistics + print_column_stats(stats, limit=20) + + # Print nested field statistics + print_nested_field_stats(stats, limit=20) + + return stats, df + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python3 csv_data_analyzer.py [sample_size]") + sys.exit(1) + + file_path = sys.argv[1] + sample_size = sys.argv[2] if len(sys.argv) > 2 else None + + main(file_path, sample_size) \ No newline at end of file diff --git a/src/main/python/utils/files_utils.py b/src/main/python/utils/files_utils.py new file mode 100644 index 00000000000..9683e9e77df --- /dev/null +++ b/src/main/python/utils/files_utils.py @@ -0,0 +1,91 @@ +import gzip +import io +import os + +from tqdm import tqdm +from tqdm.auto import tqdm +import pandas as pd +import re + +def combine_csv_files(input_files, output_file): + # Read and combine CSV files vertically + combined_df = pd.concat([pd.read_csv(f) for f in input_files], ignore_index=True) + + # Write the combined dataframe to a new CSV file + combined_df.to_csv(output_file, index=False) + + print(f"Combined CSV file has been created: {output_file}") + return combined_df # Return the dataframe for further processing if needed + +def fast_df_to_gzip(df, output_file, compression_level=5, chunksize=100000): + """ + Write a pandas DataFrame to a compressed CSV.gz file quickly with a progress bar. + + :param df: pandas DataFrame to write + :param output_file: path to the output .csv.gz file + :param compression_level: gzip compression level (1-9, 9 being highest) + :param chunksize: number of rows to write at a time + """ + total_rows = len(df) + + with gzip.open(output_file, 'wt', compresslevel=compression_level) as gz_file: + # Write header + gz_file.write(','.join(df.columns) + '\n') + + # Write data in chunks + with tqdm(total=total_rows, desc="Writing to gzip", unit="rows") as pbar: + for start in range(0, total_rows, chunksize): + end = min(start + chunksize, total_rows) + chunk = df.iloc[start:end] + + csv_buffer = io.StringIO() + chunk.to_csv(csv_buffer, index=False, header=False) + gz_file.write(csv_buffer.getvalue()) + + pbar.update(end - start) + + +def sanitize_name(filename): + # Start with the original filename + sanitized = filename + + # Replace other common superscripts if needed + superscript_map = {'¹': '1', '²': '2', '³': '3', '⁴': '4', '⁵': '5', '⁶': '6', '⁷': '7', '⁸': '8', '⁹': '9'} + for sup, normal in superscript_map.items(): + sanitized = sanitized.replace(sup, normal) + + # Replace parentheses with underscores + sanitized = sanitized.replace('(', '_').replace(')', '_') + + # Replace forward slashes and backslashes with dashes + sanitized = sanitized.replace('/', '-').replace('\\', '-') + + # Replace spaces with underscores + sanitized = sanitized.replace(' ', '_') + + # Remove or replace any other non-alphanumeric characters (except dashes and underscores) + sanitized = re.sub(r'[^\w\-_]', '', sanitized) + + # Replace any sequence of dashes or underscores with a single underscore + sanitized = re.sub(r'[_-]+', '_', sanitized) + + # Remove leading and trailing underscores + sanitized = sanitized.strip('_') + + return sanitized + + +def check_files(paths, delete=True): + if isinstance(paths, str): + paths = [paths] + + results = [] + for path in paths: + exists = os.path.isfile(path) + if exists and delete: + os.remove(path) + results.append(False) + else: + results.append(exists and not delete) + + return all(results) \ No newline at end of file diff --git a/src/main/python/utils/log_filter_script.py b/src/main/python/utils/log_filter_script.py new file mode 100644 index 00000000000..827fecbc50d --- /dev/null +++ b/src/main/python/utils/log_filter_script.py @@ -0,0 +1,97 @@ +import os +import sys +import re +from pathlib import Path +from collections import Counter + + +def filter_log_file(input_file, output_file): + """ + Filter out repetitive IntHashGrid error messages and freight carrier messages from log file. + + Args: + input_file (str): Path to input log file + output_file (str): Path to output filtered log file + """ + try: + # Patterns to match the different error messages + spatial_pattern = re.compile( + r'.*ERROR com\.conveyal\.r5\.streets\.IntHashGrid - Visiting too many spatial index cells\.') + freight_pattern = re.compile( + r'.*ERROR b\.a\.a\.f\.input\.GenericFreightReader - Following freight carrier row discarded because tour ([\w-]+) was filtered out: \{.*\}') + + # Counters for different types of filtered lines + spatial_count = 0 + freight_count = 0 + total_count = 0 + + # Counter for specific tours that were filtered out + filtered_tours = Counter() + + with open(input_file, 'r', encoding='utf-8') as infile, \ + open(output_file, 'w', encoding='utf-8') as outfile: + + for line in infile: + total_count += 1 + + # Check for spatial index error + if spatial_pattern.match(line): + spatial_count += 1 + continue + + # Check for freight carrier error + freight_match = freight_pattern.match(line) + if freight_match: + freight_count += 1 + tour_id = freight_match.group(1) + filtered_tours[tour_id] += 1 + continue + + # Write non-matching lines to output file + outfile.write(line) + + # Print summary + print(f"\nProcessing complete:") + print(f"Total lines processed: {total_count}") + print(f"\nFiltered messages:") + print(f"- Spatial index errors: {spatial_count}") + print(f"- Freight carrier messages: {freight_count}") + print(f"Total lines filtered: {spatial_count + freight_count}") + print(f"Lines remaining: {total_count - (spatial_count + freight_count)}") + + # Print freight tour details if any were found + if filtered_tours: + print(f"\nFiltered tours breakdown:") + print("Tour ID\t\tCount") + print("-" * 30) + for tour_id, count in sorted(filtered_tours.items()): + print(f"{tour_id}\t\t{count}") + + # Additional statistics about tours + print(f"\nTotal unique tours filtered: {len(filtered_tours)}") + + print(f"\nFiltered log saved to: {output_file}") + + except FileNotFoundError: + print(f"Error: Could not find input file '{input_file}'") + sys.exit(1) + except PermissionError: + print(f"Error: Permission denied when accessing files") + sys.exit(1) + except Exception as e: + print(f"An unexpected error occurred: {str(e)}") + sys.exit(1) + + +if __name__ == "__main__": + # Get input file path from command line argument or use default + if len(sys.argv) > 1: + input_file = sys.argv[1] + else: + input_file = os.path.expanduser("~/Downloads/beamLog (3).out") + + # Create output filename by adding '_filtered' before the extension + input_path = Path(input_file) + output_file = input_path.with_stem(input_path.stem + '_filtered') + + filter_log_file(input_file, str(output_file)) diff --git a/src/main/python/utils/study_area_config.py b/src/main/python/utils/study_area_config.py new file mode 100644 index 00000000000..44160cd82a7 --- /dev/null +++ b/src/main/python/utils/study_area_config.py @@ -0,0 +1,541 @@ +""" +Configuration file for study area settings used in OSM network download and processing. +This file contains all the parameters needed to define a study area and its network characteristics. +""" +import os +import osmnx as ox +from osmnx import settings +import pandas as pd + +############################# +########## Methods ########## +############################# + +def get_fuel_key(row): + """ + Derive the standardized fuel key from vehicle data row. + + This function extracts the primary fuel type and adds a suffix + for electric vehicles based on whether they are pure electric + or hybrid vehicles. + + Args: + row (pandas.Series): A row from a vehicle types DataFrame + containing 'primaryFuelType' and 'secondaryFuelType' columns + + Returns: + str: A standardized fuel key string + """ + # Get primary fuel and convert to lowercase + fuel = row['primaryFuelType'].lower() + + # Special handling for electric vehicles + if fuel == "electricity": + # Check if it's a hybrid (has a secondary fuel) or pure electric + suffix = "only" if pd.isna(row['secondaryFuelType']) else "hybrid" + return f"{fuel}-{suffix}" + + return fuel + +def generate_network_name(config: dict) -> str: + """ + Generate a configuration name based on study area, graph layers, and tolerance. + Format: [study_area]-[main_geo_level]-[residential_geo_level][density]-t[tolerance][-ferry]-network + + Example output: sfbay-area-cbg7000-network or sfbay-area-cbg7000-ferry-network + """ + # Get study area + study_area = config["study_area"] + layers = config["network"]["graph_layers"] + + # Get residential geographic level and density + if "residential" in layers: + density_value = str(layers["residential"]["min_density_per_km2"]) + residential_geo_level = f"-{layers["residential"]["geo_level"]}{density_value}" + else: + density_value = "" + residential_geo_level = "" + + # Ferry suffix + ferry_suffix = "-ferry" if "ferry" in layers else "" + + # Combine all parts + return f"{study_area}-area{residential_geo_level}{ferry_suffix}-network" + + +def create_osm_highway_filter(highway_types): + """ + Convert a list of highway types to an OSM custom filter string. + + Args: + highway_types (list): List of highway type strings + + Returns: + str: OSM custom filter string in the format '["highway"~"type1|type2|..."]' + """ + # Join the highway types with the pipe character + highway_regex = "|".join(highway_types) + + # Create the full filter string + filter_string = f'["highway"~"{highway_regex}"]' + + return filter_string + + +def get_area_config(area_name): + + """ + Retrieve a deep copy of the configuration for the specified area. + + Args: + area_name (str): The name of the area ('sfbay' or 'seattle') + + Returns: + dict: A deep copy of the area's configuration + + Raises: + ValueError: If an invalid area name is provided + """ + import copy + area_configs = { + "sfbay": sfbay_area_config, + "seattle": seattle_area_config + } + + if area_name not in area_configs: + valid_areas = ", ".join(f"'{area}'" for area in area_configs.keys()) + raise ValueError(f"Invalid area name '{area_name}'. Choose from: {valid_areas}") + + return copy.deepcopy(area_configs[area_name]) + +############################# +########## Settings ######### +############################# + +# Create a file named beam_classes.py + +class BeamClasses: + """ + BEAM vehicle class definitions with flexible import options. + + This class provides accessible vehicle class constants used in BEAM transportation models, + with helper methods for grouping and categorization. + """ + # Freight vehicle classes + CLASS_2B3_VOCATIONAL = 'Class2b3Vocational' + CLASS_456_VOCATIONAL = 'Class456Vocational' + CLASS_78_VOCATIONAL = 'Class78Vocational' + CLASS_78_TRACTOR = 'Class78Tractor' + + # Non-freight vehicle classes + CLASS_CAR = "Car" # includes light and medium duty trucks + CLASS_BIKE = "Bike" + CLASS_MDP = "MediumDutyPassenger" + + @classmethod + def get_medium_heavy_freight_classes(cls): + """Returns a list of all freight vehicle classes.""" + return [ + cls.CLASS_456_VOCATIONAL, + cls.CLASS_78_VOCATIONAL, + cls.CLASS_78_TRACTOR + ] + + @classmethod + def get_freight_classes(cls): + """Returns a list of all freight vehicle classes.""" + return [ + cls.CLASS_2B3_VOCATIONAL, + cls.CLASS_456_VOCATIONAL, + cls.CLASS_78_VOCATIONAL, + cls.CLASS_78_TRACTOR + ] + + @classmethod + def get_passenger_classes(cls): + """Returns a list of all non-freight vehicle classes.""" + return [ + cls.CLASS_CAR, + cls.CLASS_BIKE, + cls.CLASS_MDP + ] + + @classmethod + def get_all_classes(cls): + """Returns a list of all vehicle classes.""" + return cls.get_freight_classes() + cls.get_passenger_classes() + + @classmethod + def is_freight(cls, beam_class): + """Returns True if the given class is a freight vehicle class.""" + return beam_class in cls.get_freight_classes() + + @classmethod + def class_to_display_name(cls, beam_class): + """Converts internal class names to display-friendly names.""" + display_names = { + cls.CLASS_2B3_VOCATIONAL: "Class 2b/3 Vocational", + cls.CLASS_456_VOCATIONAL: "Class 4-6 Vocational", + cls.CLASS_78_VOCATIONAL: "Class 7-8 Vocational", + cls.CLASS_78_TRACTOR: "Class 7-8 Tractor", + cls.CLASS_CAR: "Passenger Car", + cls.CLASS_BIKE: "Bicycle", + cls.CLASS_MDP: "Medium-Duty Passenger" + } + return display_names.get(beam_class, beam_class) + +constants = { + "joule_per_meter_base_rate": 1.213e8, # Energy consumption base rate in joules per meter + "max_fuel_capacity_in_joule": 1.2e16, # Maximum fuel capacity in joules (represents physical tank limits) + "meters_per_mile": 1609.34 # Conversion factor from miles to meters +} + +osm_highways = ["motorway", "motorway_link", "trunk", "trunk_link", "primary", "primary_link", "secondary", + "secondary_link", "tertiary", "tertiary_link", "unclassified", "residential"] + +osmnx_settings = { + "log_console": True, + "use_cache": True, + "cache_only_mode": False, + "all_oneway": True, + "requests_timeout": 180, + "overpass_memory": None, + "max_query_area_size": 50 * 1000 * 50 * 1000, # 50km × 50km + "overpass_rate_limit": False, + "overpass_max_attempts": 3, + "useful_tags_way": list(ox.settings.useful_tags_way) + [ + "maxweight", "hgv", "maxweight:hgv", "maxlength", "motorcar", "motor_vehicle", "goods", "truck" + ], + "overpass_url": "https://overpass-api.de/api", + # https://wiki.openstreetmap.org/wiki/Overpass_API#Public_Overpass_API_instances + } + +weight_limits = { + "unit": "lbs", + "mdv_max": 26000, # Upper limit for Medium Duty Vehicles (Class 3-6) in pounds + "hdv_max": 80000, # Upper limit for Heavy Duty Vehicles (Class 7-8) in pounds + } + +fastsim_routee_files = { + "primary_powertrain": { + "md-D-Diesel": "Freight_Baseline_FASTSimData_2020/Class_6_Box_truck_(Diesel,_2020,_no_program).csv", + "md-E-BE": "Freight_Baseline_FASTSimData_2020/Class_6_Box_truck_(BEV,_2025,_no_program).csv", + # "md-E-H2FC": np.nan, + "md-E-PHEV": "Freight_Baseline_FASTSimData_2020/Class_6_Box_truck_(BEV,_2025,_no_program).csv", + "hdt-D-Diesel": "Freight_Baseline_FASTSimData_2020/Class_8_Sleeper_cab_high_roof_(Diesel,_2020,_no_program).csv", + "hdt-E-BE": "Freight_Baseline_FASTSimData_2020/Class_8_Sleeper_cab_high_roof_(BEV,_2025,_no_program).csv", + # "hdt-E-H2FC": np.nan, + "hdt-E-PHEV": "Freight_Baseline_FASTSimData_2020/Class_8_Sleeper_cab_high_roof_(BEV,_2025,_no_program).csv", + "hdv-D-Diesel": "Freight_Baseline_FASTSimData_2020/Class_8_Box_truck_(Diesel,_2020,_no_program).csv", + "hdv-E-BE": "Freight_Baseline_FASTSimData_2020/Class_8_Box_truck_(BEV,_2025,_no_program).csv", + # "hdv-E-H2FC": np.nan, + "hdv-E-PHEV": "Freight_Baseline_FASTSimData_2020/Class_8_Box_truck_(BEV,_2025,_no_program).csv" + }, + "secondary_powertrain": { + # "md-D-Diesel": np.nan, + # "md-E-BE": np.nan, + # "md-E-H2FC": np.nan, + "md-E-PHEV": ("Diesel", + 9595.796035186175, + constants["max_fuel_capacity_in_joule"], + "Freight_Baseline_FASTSimData_2020/Class_6_Box_truck_(HEV,_2025,_no_program).csv"), + # "hdt-D-Diesel": np.nan, + # "hdt-E-BE": np.nan, + # "hdt-E-H2FC": np.nan, + "hdt-E-PHEV": ("Diesel", + 13817.086117829229, + constants["max_fuel_capacity_in_joule"], + "Freight_Baseline_FASTSimData_2020/Class_8_Sleeper_cab_high_roof_(HEV,_2025,_no_program).csv"), + # "hdv-D-Diesel": np.nan, + # "hdv-E-BE": np.nan, + # "hdv-E-H2FC": np.nan, + "hdv-E-PHEV": ("Diesel", + 14026.761465378302, + constants["max_fuel_capacity_in_joule"], + "Freight_Baseline_FASTSimData_2020/Class_8_Box_truck_(HEV,_2025,_no_program).csv") + } +} + +########## SF Bay Area ######### + +sfbay_area_config = { + # Base paths + "work_dir": os.path.expanduser("~/Workspace/Simulation/sfbay"), + "study_area": "sfbay", + "state_fips": "06", + # 087 Santa Cruz + # 113 Yolo + "county_fips": ['001', '013', '041', '055', '075', '081', '085', '095', '097'], + "census_year": 2018, + + "geo": { + "utm_epsg": 26910, # NAD83 / UTM zone 10N + "taz_shp": "geo/shp/sfbay-tazs-epsg-26910.shp", + "taz_id": "taz1454", + "cbg_id": "GEOID", + }, + + "network": { + "osmnx_settings": osmnx_settings, + "weight_limits": weight_limits, # Vehicle weight classifications (FHWA) + "download_enabled": True, # if download isn't enabled, we read network from disk + "tolerance": 2, + "graph_layers": { # Density thresholds and corresponding network filters + "main": { + "geo_level": "county", + "custom_filter": create_osm_highway_filter(list(set(osm_highways) - {"residential"})), + "buffer_zone_in_meters": 200 + }, + "residential": { + "min_density_per_km2": 5500, + "geo_level": "cbg", + "custom_filter": create_osm_highway_filter(osm_highways), + "buffer_zone_in_meters": 20 + } + # // California has a higher urbanization rate (94.8% urban vs 80.7% national average) + # // https://dof.ca.gov/wp-content/uploads/sites/352/Forecasting/Demographics/Documents/Urban-Rural_Classification_and_2020_Urban_Area_Criteria_CA_SDC.pdf + # const avgPersonsPerHousehold = 2.9; // CA average household size (higher than national 2.5) + # + # // Core density calculation (using similar proportions as national but adjusted for CA household size) + # const coreHUDensity = 1275; // National high-density nucleus requirement + # const caDensityAdjustment = 2.9 / 2.5; // CA vs national household size ratio + # // Calculate CA-adjusted thresholds + # const caHighDensityPPSM = coreHUDensity * 2.9; + # const caInitialCorePPSM = 425 * 2.9; + # const caUrbanExtensionPPSM = 200 * 2.9; + # // Result + # // California-adjusted density thresholds (persons per square mile): + # // densest urban cores, typical of downtown areas in major California cities: 7,395 ppsm = 2,855 ppsk + # // High-density nucleus requirement: 3698 ppsm = 1429 ppsk + # // Initial core requirement: 1233 ppsm = 475 ppsk + # // Urban extension requirement: 580 ppsm = 224 ppsk + # // Rural Areas less than 580 people per square mile + }, + "validation": { + "npmrds": { + "year": 2018, + "geo": "validation/npmrds/California.shp", + "data": "validation/npmrds/al_ca_oct2018_1hr_trucks_pax.csv" + } + } + }, + + # FastSim routee files + "fastsim_routee_files": fastsim_routee_files, + + "freight": { + "stops_data": "data/austin_cargo_operations.csv", + "2018_Baseline" : { + "carriers_file": f"beam-ft/2024-11-06/2018-Baseline/carriers--2018-Baseline.csv", + "payloads_file": f"beam-ft/2024-11-06/2018-Baseline/payloads--2018-Baseline.csv", + "tours_file": f"beam-ft/2024-11-06/2018-Baseline/tours--2018-Baseline.csv", + "ft_vehicle_types_file": f"vehicle-tech/ft-vehicletypes--20241106--2018-Baseline.csv" + } + }, + + "emissions": { + "2018-Baseline" : { + "override_rates": False, + "override_fleet": True, + "rates": { + "output_dir": "emissions/20240123", + "filters": { + "season_month": "Annual", + "calendar_year": 2018, + "temperature": 60., + "relative_humidity": 40., + "sub_area": ["SF"], + "include_nan": True + }, + "emfac": { + "emfac_rates_by_model_year_file": f"emissions/rates/emfac/imputed_MTC_emission_rate_agg_NH3_added_2018_2025_2030_2040_2050.csv", + "emfac_vmt_by_model_year_file": f"emissions/rates/emfac/Default_Statewide_2018_2025_2030_2040_2050_Annual_vmt_20240612233346.csv", + "emfac_pop_by_model_year_file": f"emissions/rates/emfac/Default_Statewide_2018_2025_2030_2040_2050_Annual_population_20240612233346.csv" + }, + "black_carbon": { + "black_carbon_rates_file": f"emissions/rates/black_carbon/emfac_bc_rate_three_ver_2018.csv", + }, + "road_dust": { + "rainy_days_file": f"emissions/rates/road_dust/CA_input/rainy_days.csv", + "silt_loading_file": f"emissions/rates/road_dust/CA_input/silt_loading.csv", + } + }, + "beam" : { + "carriers_file": f"beam-ft/20240123/2018-Baseline/carriers--2018-Baseline.csv", + "payloads_file": f"beam-ft/20240123/2018-Baseline/payloads--2018-Baseline.csv", + "ft_vehicle_types_file": f"vehicle-tech/vehicleTypes--frism--2018-Baseline.csv", + "pax_vehicles_file": f"beam-pax/2023-Baseline/vehicles--atlas--2023-Baseline.csv.gz", + "pax_vehicle_types_file": f"vehicle-tech/vehicleTypes--atlas--2023-Baseline.csv" + }, + "mapping": { + "fleet": { + "ignore_beam_passenger_distribution": False, + "ignore_beam_freight_distribution": False + }, + "atlas":{ + "enable_atlas_emfac_crosswalk": True, + "emfac": f"atlas/atlas-emfac-xwalk.csv", + "routee": f"atlas/vehicle_type_mapping_baseline.csv", + "alternatives": { + "car": ['car'], + "suv": ['suv', 'car', 'truck'], + 'truck': ['truck', 'suv', 'minvan'], + 'van': ['minvan', 'truck'], + 'minvan': ['minvan', 'truck', 'van'] + } + }, + "fuel": { + "beam": { + "hydrogen": 'Elec', # From emission pov, BEAM's hydrogen cars shall be electric + "electricity-only": 'Elec', + "electricity-hybrid": 'Phe', + "gasoline": 'Gas', + "diesel": 'Dsl', + "biodiesel": 'Dsl' # From emission pov, BEAM's biodiesel cars shall be diesel + }, + "emfac-ft": { + "Elec": 'Elec', + "Phe": 'Phe', + "Gas": 'Dsl', + "Dsl": 'Dsl', + "NG": 'Dsl' # EMFAC NG cars will be mapped to BEAM's diesel cars + }, + "emfac-pax": { + "Elec": 'Elec', + "Phe": 'Phe', + "Gas": 'Gas', + "Dsl": 'Gas', + "NG": 'Gas' # EMFAC NG cars will be mapped to BEAM's diesel cars + }, + "emfac-bus": { + "Elec": 'Elec', + "Phe": 'Phe', + "Gas": 'Gas', + "Dsl": 'Dsl', + "NG": 'Dsl' + }, + "alternatives": { + "Elec": ['Elec', 'Phe'], + 'Phe': ['Phe', 'Elec'], + "Gas": ['Gas', 'Dsl'], + "Dsl": ['Dsl', 'Gas'] + } + }, + "class": { + "emfac-ft": { + "T6 CAIRP Class 4": "Class456Vocational", + "T6 CAIRP Class 5": "Class456Vocational", + "T6 CAIRP Class 6": "Class456Vocational", + "T6 CAIRP Class 7": "Class78Tractor", + "T6 Instate Delivery Class 4": "Class456Vocational", + "T6 Instate Delivery Class 5": "Class456Vocational", + "T6 Instate Delivery Class 6": "Class456Vocational", + "T6 Instate Delivery Class 7": "Class78Vocational", + "T6 Instate Other Class 4": "Class456Vocational", + "T6 Instate Other Class 5": "Class456Vocational", + "T6 Instate Other Class 6": "Class456Vocational", + "T6 Instate Other Class 7": "Class78Vocational", + "T6 Instate Tractor Class 6": "Class456Vocational", + "T6 Instate Tractor Class 7": "Class78Tractor", + "T6 OOS Class 4": "Class456Vocational", + "T6 OOS Class 5": "Class456Vocational", + "T6 OOS Class 6": "Class456Vocational", + "T6 OOS Class 7": "Class78Vocational", + "T7 CAIRP Class 8": "Class78Tractor", + "T7 NNOOS Class 8": "Class78Vocational", + "T7 NOOS Class 8": "Class78Vocational", + "T7 Single Concrete/Transit Mix Class 8": "Class78Vocational", + "T7 Single Dump Class 8": "Class78Vocational", + "T7 Single Other Class 8": "Class78Vocational", + "T7 Tractor Class 8": "Class78Tractor", + "T7IS": "Class78Tractor" + }, + "emfac-pax": { + "LDA": "Car", + "LDT1": "Car", + "LDT2": "Car", + "MCY": "Bike", + "MDV": "Car" + }, + "emfac-bus": { + "UBUS": "MediumDutyPassenger" + }, + "alternatives": { + "Class456Vocational": ['Class456Vocational', 'Class78Vocational'], + 'Class78Vocational': ['Class78Vocational', 'Class456Vocational', 'Class78Tractor'], + "Class78Tractor": ['Class78Tractor', 'Class78Vocational'], + "Car": ['Car'], + "Bike": ['Bike'], + "MediumDutyPassenger": ['MediumDutyPassenger'] + } + } + } + } + } +} + +########## Seattle Area ######### + +seattle_area_config = { + # OSMNX settings + "osmnx_settings": osmnx_settings, + + # Vehicle weight classifications (FHWA) + "weight_limits": weight_limits, + + # FastSim routee files + "fastsim_routee_files": fastsim_routee_files, + + # if download isn't enabled, we read network from disk + "download_enabled": True, + + # Base paths + "work_dir": os.path.expanduser("~/Workspace/Simulation/seattle"), + + # Geographic settings + "study_area": "seattle", + "state_fips": "53", + "county_fips": ["061", "033", "035", "053"], # ["061", "033", "035", "053"] + "census_year": 2018, + "utm_epsg": 32048, # + "tolerance": 2, + + # Density thresholds and corresponding network filters + "graph_layers": { + "main": { + "geo_level": "county", + "custom_filter": create_osm_highway_filter(list(set(osm_highways) - {"residential"})), + "buffer_zone_in_meters": 200 + }, + "ferry": { + "geo_level": "county", + "custom_filter": '["route"="ferry"]', + "buffer_zone_in_meters": 10000 + }, + "residential": { + "min_density_per_km2": 0, + "geo_level": "cbg", + "custom_filter": create_osm_highway_filter(osm_highways), + "buffer_zone_in_meters": 20 + } + # // Washington has a moderate urbanization rate (84.1% urban vs 80.7% national average) + # // https://www.census.gov/quickfacts/fact/table/WA/INC110223 + # // Washington's urbanization rate is higher than the national average but lower than California's 94.8% + # const avgPersonsPerHousehold = 2.51; // WA average household size (slightly higher than national 2.5) + + # // Core density calculation (using similar proportions as national but adjusted for WA household size) + # const coreHUDensity = 1275; // National high-density nucleus requirement + # const waDensityAdjustment = 2.51 / 2.5; // WA vs national household size ratio + # // Calculate WA-adjusted thresholds + # const waHighDensityPPSM = coreHUDensity * 2.51; + # const waInitialCorePPSM = 425 * 2.51; + # const waUrbanExtensionPPSM = 200 * 2.51; + + # // Washington-adjusted density thresholds (persons per square mile): + # // densest urban cores, typical of downtown areas in major Washington cities: 3200 ppsm = 1236 ppsk + # // High-density nucleus requirement: 3200 ppsm = 1236 ppsk + # // Initial core requirement: 1067 ppsm = 412 ppsk + # // Urban extension requirement: 502 ppsm = 194 ppsk + # // Rural Areas less than 502 people per square mile + } +} \ No newline at end of file diff --git a/src/main/resources/beam-template.conf b/src/main/resources/beam-template.conf index 09847bf8ff9..6dafdc61617 100755 --- a/src/main/resources/beam-template.conf +++ b/src/main/resources/beam-template.conf @@ -30,6 +30,7 @@ beam.agentsim.timeBinSize = "int | 3600" beam.agentsim.firstIteration = "int | 0" beam.agentsim.lastIteration = "int | 0" beam.agentsim.endTime = "30:00:00" +beam.agentsim.lastTransitTrip = "28:00:00" beam.agentsim.scheduleMonitorTask.initialDelay = 1 beam.agentsim.scheduleMonitorTask.interval = 30 beam.agentsim.snapLocationAndRemoveInvalidInputs = "boolean | false" @@ -106,6 +107,8 @@ beam.agentsim.agents.modalBehaviors.lowTimeSensitivity.highCongestion.highwayFac beam.agentsim.agents.modalBehaviors.lowTimeSensitivity.highCongestion.nonHighwayFactor.LevelLE2 = "double | 1.0" beam.agentsim.agents.modalBehaviors.lowTimeSensitivity.lowCongestion.highwayFactor.LevelLE2 = "double | 1.0" beam.agentsim.agents.modalBehaviors.lowTimeSensitivity.lowCongestion.nonHighwayFactor.LevelLE2 = "double | 1.0" +beam.agentsim.agents.modalBehaviors.multinomialLogit.units = "dollars" +beam.agentsim.agents.modalBehaviors.multinomialLogit.params.time = "double | 0.022" beam.agentsim.agents.modalBehaviors.multinomialLogit.params.transfer = "double | -1.4" beam.agentsim.agents.modalBehaviors.multinomialLogit.params.transit_crowding = "double | 0.0" beam.agentsim.agents.modalBehaviors.multinomialLogit.params.transit_crowding_percentile = "double | 90.0" @@ -485,6 +488,11 @@ beam.exchange { urbansim.scenarioLoadingTimeoutSeconds = "int | 3000" } output { + activitySimSkimsEnabled = "boolean | false" + sendNonChosenTripsToSkimmer = "boolean | true" + generateSkimsForAllModes = "boolean | false" + generateSkimsForRideHailTransit = "boolean | false" + # geo level different than TAZ (in beam taz-centers format) emissions { # This is the list of pollutants to filter out among # "CH4", "CO", "CO2", "HC", "NH3", "NOx", "PM", "PM10", "PM2_5", "ROG", "SOx", "TOG" @@ -500,6 +508,7 @@ beam.exchange { primary.enabled = "boolean | false" secondary.enabled = "boolean | false" # To filter the modes in the mapped skim using beam mode schema + #@optional secondary.beamModeFilter = [ "car", "bike", @@ -512,8 +521,8 @@ beam.exchange { "walk_transit", ] # geo level different than beam.agentsim.taz format - secondary.taz.filePath = string - secondary.taz.tazIdFieldName = string + secondary.taz.filePath = "String | ''" + secondary.taz.tazIdFieldName = "String | ''" #@optional secondary.taz.tazMapping { # To map this taz level with beam.agentsim.taz using two columns CSV file @@ -643,28 +652,28 @@ beam.physsim.network.overwriteRoadTypeProperties { alpha = "double?" beta = "double?" } - primary { + trunk { speed = "double?" capacity = "int?" lanes = "int?" alpha = "double?" beta = "double?" } - primaryLink { + trunkLink { speed = "double?" capacity = "int?" lanes = "int?" alpha = "double?" beta = "double?" } - trunk { + primary { speed = "double?" capacity = "int?" lanes = "int?" alpha = "double?" beta = "double?" } - trunkLink { + primaryLink { speed = "double?" capacity = "int?" lanes = "int?" @@ -699,28 +708,28 @@ beam.physsim.network.overwriteRoadTypeProperties { alpha = "double?" beta = "double?" } - minor { + unclassified { speed = "double?" capacity = "int?" lanes = "int?" alpha = "double?" beta = "double?" } - residential { + minor { speed = "double?" capacity = "int?" lanes = "int?" alpha = "double?" beta = "double?" } - livingStreet { + residential { speed = "double?" capacity = "int?" lanes = "int?" alpha = "double?" beta = "double?" } - unclassified { + livingStreet { speed = "double?" capacity = "int?" lanes = "int?" @@ -756,6 +765,7 @@ beam.replanning.ModuleProbability_3 = 0.1 beam.replanning.Module_4 = "TimeMutator" beam.replanning.ModuleProbability_4 = 0.0 beam.replanning.fractionOfIterationsToDisableInnovation = "double | Double.PositiveInfinity" +beam.replanning.subtractExpectedScores = "boolean | true" #@optional beam.replanning.clearModes.modes = [string] | [] beam.replanning.clearModes.iteration = "int | 0" @@ -821,7 +831,7 @@ beam.agentsim.h3taz = { # * linkStatsFromLastRun (only link stats is loaded from beam.input.lastBaseOutputDir directory) beam.warmStart.type = "disabled" beam.warmStart.samplePopulationIntegerFlag = 0 # Int chosen instead of Boolean, as passthrough coverts Boolean to Int -#PATH can be a directory or zip archive of the output directory (e.g. like what get's stored on S3), including a URL to an S3 output. +#PATH can be a directory or zip archive of the output directory (e.g. like what get's stored on S3), including a URL to an S3 output../gradlew -Ptag=beammodel/beam:0.9.0.0 buildImage beam.warmStart.path = "" # If the full warmstart directory isn't being used, it is possible to only load in a linkStats file instead to pre- # populate the link travel times. Add path to linkStats file here @@ -1055,6 +1065,7 @@ beam.routing { # if the route has a different access mode to the fastest, the actual amount of minutes used to decide # if it will be kept is 5 times this parameter. suboptimalMinutes = "int | 10" + suboptimalMinutesForDriveAccess = "int | 2" # HOW LONG DOES IT TAKE YOU TO PARK YOUR VEHICLE AT THE STATION accessBufferTimeSeconds { bike = "int | 60" diff --git a/src/main/scala/beam/agentsim/agents/PersonAgent.scala b/src/main/scala/beam/agentsim/agents/PersonAgent.scala index fbb500e7abf..ed1e0f7e6bf 100644 --- a/src/main/scala/beam/agentsim/agents/PersonAgent.scala +++ b/src/main/scala/beam/agentsim/agents/PersonAgent.scala @@ -53,6 +53,7 @@ import beam.sim.common.GeoUtils import beam.sim.config.BeamConfig.Beam.Debug import beam.sim.population.AttributesOfIndividual import beam.sim.{BeamScenario, BeamServices, Geofence} +import beam.utils.DateUtils.getLastTransitTripTime import beam.utils.MeasureUnitConversion._ import beam.utils.NetworkHelper import beam.utils.logging.ExponentialLazyLogging @@ -223,7 +224,7 @@ object PersonAgent { restOfCurrentTrip.headOption.exists(_.isRideHail) && !rideHailReservedForLegs.contains(restOfCurrentTrip.head) } - def currentTourModeIsIn(modes: BeamMode*): Boolean = currentTourMode.exists(modes.contains) + def currentTripModeIsIn(modes: BeamMode*): Boolean = currentTripMode.exists(modes.contains) override def withPassengerSchedule(newPassengerSchedule: PassengerSchedule): DrivingData = copy(passengerSchedule = newPassengerSchedule) @@ -673,7 +674,8 @@ class PersonAgent( currentTourModeChoiceStrategy.tourVehicle.orElse(data.currentTourPersonalVehicle), numberOfReplanningAttempts = 0, failedTrips = IndexedSeq.empty, - enrouteData = EnrouteData() + enrouteData = EnrouteData(), + passengerSchedule = PassengerSchedule() ), SpaceTime(currentCoord, _currentTick.get), excludeModes = @@ -794,24 +796,35 @@ class PersonAgent( serviceName = response.rideHailManagerName ) ) - val currentCoord = beamServices.geo.wgs2Utm(data.restOfCurrentTrip.head.beamLeg.travelPath.startPoint).loc eventsManager.processEvent( new ReplanningEvent( tick, Id.createPersonId(id), replanningReason, - currentCoord.getX, - currentCoord.getY + data.restOfCurrentTrip.head.beamLeg.travelPath.startPoint.loc.getX, + data.restOfCurrentTrip.head.beamLeg.travelPath.startPoint.loc.getY ) ) + + val currentCoord = beamServices.geo.wgs2Utm(data.restOfCurrentTrip.head.beamLeg.travelPath.startPoint).loc val nextCoord = nextActivity(data).get.getCoord goto(ChoosingMode) using ChoosesModeData( - data.copy(currentTripMode = None, numberOfReplanningAttempts = data.numberOfReplanningAttempts + 1), + data.copy( + currentTripMode = None, + currentTrip = None, + restOfCurrentTrip = List.empty[EmbodiedBeamLeg], + numberOfReplanningAttempts = data.numberOfReplanningAttempts + 1, + passengerSchedule = PassengerSchedule() + ), currentLocation = SpaceTime( currentCoord, tick ), + pendingChosenTrip = None, + rideHail2TransitRoutingResponse = None, + rideHail2TransitAccessResult = None, + rideHail2TransitEgressResult = None, isWithinTripReplanning = true, excludeModes = (if (data.numberOfReplanningAttempts > 0) Set(RIDE_HAIL, RIDE_HAIL_POOLED, RIDE_HAIL_TRANSIT) else Set()) ++ (if (canUseCars(currentCoord, nextCoord)) Set.empty[BeamMode] @@ -832,25 +845,51 @@ class PersonAgent( val currentCoord = beamServices.geo.wgs2Utm(data.nextLeg.beamLeg.travelPath.startPoint).loc val nextCoord = nextActivity(data).get.getCoord + val nextCoordWgs = beamServices.geo.utm2Wgs(nextCoord) val replanningReason = getReplanningReasonFrom(data, firstErrorResponse.errorCode.entryName) eventsManager.processEvent( new ReplanningEvent( _currentTick.get, Id.createPersonId(id), replanningReason, - currentCoord.getX, - currentCoord.getY, - nextCoord.getX, - nextCoord.getY + data.nextLeg.beamLeg.travelPath.startPoint.loc.getX, + data.nextLeg.beamLeg.travelPath.startPoint.loc.getY, + nextCoordWgs.getX, + nextCoordWgs.getY ) ) + + val (replannedMode, excludedMode) = + data.currentTripMode match { // Keep drive transit if we're picking up the vehicle + case Some(mode @ (BIKE_TRANSIT | DRIVE_TRANSIT)) if isLastTripWithinTour(nextActivity(data).get) => + (mode, None) + case Some(mode @ (BIKE_TRANSIT | RIDE_HAIL_TRANSIT | DRIVE_TRANSIT)) => + (WALK_TRANSIT, Some(mode)) + case _ => (WALK_TRANSIT, None) + } + goto(ChoosingMode) using ChoosesModeData( - data.copy(numberOfReplanningAttempts = data.numberOfReplanningAttempts + 1), + data.copy( + numberOfReplanningAttempts = data.numberOfReplanningAttempts + 1, + currentTrip = None, + currentTripMode = Some(replannedMode), + restOfCurrentTrip = List.empty[EmbodiedBeamLeg], + passengerSchedule = PassengerSchedule(), + failedTrips = data.failedTrips ++ data.currentTrip.map(trip => + trip.copy(legs = trip.legs.filter(_.beamLeg.startTime > _currentTick.getOrElse(-1))) + ) + ), currentLocation = SpaceTime(currentCoord, _currentTick.get), + pendingChosenTrip = None, + rideHail2TransitRoutingResponse = None, + rideHail2TransitAccessResult = None, + rideHail2TransitEgressResult = None, isWithinTripReplanning = true, - excludeModes = + excludeModes = excludedMode.toSet ++ ( if (canUseCars(currentCoord, nextCoord)) Set.empty else Set(BeamMode.RIDE_HAIL, BeamMode.CAR, BeamMode.CAV) + ), + mostRecentDeniedBoardingLeg = Some(data.nextLeg) ) } @@ -1038,7 +1077,11 @@ class PersonAgent( if (currentBeamVehicle.beamVehicleType.vehicleCategory != Bike) { if (currentBeamVehicle.stall.isEmpty) logWarn("Expected currentBeamVehicle.stall to be defined.") } - if (currentBeamVehicle.isSharedVehicle || BeamVehicle.isSharedTeleportationVehicle(currentBeamVehicle.id)) { + if ( + (currentBeamVehicle.isSharedVehicle && !BeamVehicle.isEmergencyVehicle( + currentBeamVehicle.id + )) || BeamVehicle.isSharedTeleportationVehicle(currentBeamVehicle.id) + ) { // Is a shared vehicle. Give it up. currentBeamVehicle.getManager.get ! ReleaseVehicle(currentBeamVehicle, triggerId) beamVehicles -= data.currentVehicle.head @@ -1090,30 +1133,41 @@ class PersonAgent( _currentTick.get, Id.createPersonId(id), replanningReason, - currentCoord.getX, - currentCoord.getY + basePersonData.restOfCurrentTrip.head.beamLeg.travelPath.startPoint.loc.getX, + basePersonData.restOfCurrentTrip.head.beamLeg.travelPath.startPoint.loc.getY ) ) val nextAct = nextActivity(basePersonData).get val nextCoord = nextAct.getCoord + // Change -- just switch back to walk_transit // Have to give up my mode as well, perhaps there's no option left for driving. - _experiencedBeamPlan.putStrategy(nextAct, TripModeChoiceStrategy(mode = None)) - val (updatedTourMode, updatedTourPersonalVehicle): (Option[BeamTourMode], Option[Id[BeamVehicle]]) = - if (nextAct.getType.equalsIgnoreCase("Home")) { (None, None) } - else { (basePersonData.currentTourMode, basePersonData.currentTourPersonalVehicle) } +// _experiencedBeamPlan.putStrategy(nextAct, TripModeChoiceStrategy(mode = None)) +// val (updatedTourMode, updatedTourPersonalVehicle): (Option[BeamTourMode], Option[Id[BeamVehicle]]) = +// if (nextAct.getType.equalsIgnoreCase("Home")) { (None, None) } +// else { (basePersonData.currentTourMode, basePersonData.currentTourPersonalVehicle) } goto(ChoosingMode) using ChoosesModeData( basePersonData.copy( - currentTripMode = None, - currentTourMode = updatedTourMode, + currentTripMode = Some(WALK_TRANSIT), + currentTrip = None, + restOfCurrentTrip = List.empty[EmbodiedBeamLeg], currentTourPersonalVehicle = updatedTourPersonalVehicle, - numberOfReplanningAttempts = basePersonData.numberOfReplanningAttempts + 1 + numberOfReplanningAttempts = basePersonData.numberOfReplanningAttempts + 1, + passengerSchedule = PassengerSchedule(), + failedTrips = basePersonData.failedTrips ++ basePersonData.currentTrip.map(trip => + trip.copy(legs = trip.legs.filter(_.beamLeg.startTime > _currentTick.getOrElse(-1))) + ) ), SpaceTime(currentCoord, _currentTick.get), isWithinTripReplanning = true, + pendingChosenTrip = None, + rideHail2TransitRoutingResponse = None, + rideHail2TransitAccessResult = None, + rideHail2TransitEgressResult = None, excludeModes = if (canUseCars(currentCoord, nextCoord)) Set.empty - else Set(BeamMode.RIDE_HAIL, BeamMode.CAR, BeamMode.CAV) + else Set(BeamMode.RIDE_HAIL, BeamMode.CAR, BeamMode.CAV), + mostRecentDeniedBoardingLeg = basePersonData.restOfCurrentTrip.headOption ) } @@ -1267,7 +1321,10 @@ class PersonAgent( // TRANSIT but too late case Event(StateTimeout, data: BasePersonData) if data.hasNextLeg && data.nextLeg.beamLeg.mode.isTransit && - data.nextLeg.beamLeg.startTime < _currentTick.get => + ((data.nextLeg.beamLeg.startTime < _currentTick.get) || (_currentTick.get > getLastTransitTripTime( + beamServices.beamConfig + ))) => + val nextAct = nextActivity(data) // We've missed the bus. This occurs when something takes longer than planned (based on the // initial inquiry). So we replan but change trip mode to WALK_TRANSIT since we've already done our non-transit // portion. @@ -1285,20 +1342,37 @@ class PersonAgent( _currentTick.get, Id.createPersonId(id), replanningReason, - currentCoord.getX, - currentCoord.getY + data.nextLeg.beamLeg.travelPath.startPoint.loc.getX, + data.nextLeg.beamLeg.travelPath.startPoint.loc.getY ) ) - val nextCoord = nextActivity(data).get.getCoord + val (replannedMode, excludedMode) = + data.currentTripMode match { // Keep drive transit if we're picking up the vehicle + case Some(mode @ (BIKE_TRANSIT | DRIVE_TRANSIT)) if isLastTripWithinTour(nextAct.get) => + (mode, None) + case Some(mode @ (BIKE_TRANSIT | RIDE_HAIL_TRANSIT | DRIVE_TRANSIT)) => + (WALK_TRANSIT, Some(mode)) + case _ => (WALK_TRANSIT, None) + } + + val nextCoord = nextAct.get.getCoord goto(ChoosingMode) using ChoosesModeData( personData = data - .copy(currentTripMode = Some(WALK_TRANSIT), numberOfReplanningAttempts = data.numberOfReplanningAttempts + 1), + .copy( + currentTripMode = Some(replannedMode), + numberOfReplanningAttempts = data.numberOfReplanningAttempts + 1, + passengerSchedule = PassengerSchedule(), + failedTrips = data.failedTrips ++ data.currentTrip.toVector + ), currentLocation = SpaceTime(currentCoord, _currentTick.get), + pendingChosenTrip = None, + rideHail2TransitRoutingResponse = None, + rideHail2TransitAccessResult = None, + rideHail2TransitEgressResult = None, isWithinTripReplanning = true, - excludeModes = - if (canUseCars(currentCoord, nextCoord)) Set.empty - else Set(BeamMode.RIDE_HAIL, BeamMode.CAR, BeamMode.CAV) + excludeModes = excludedMode.toSet ++ (if (canUseCars(currentCoord, nextCoord)) Set.empty + else Set(BeamMode.RIDE_HAIL, BeamMode.CAR, BeamMode.CAV)) ) // TRANSIT case Event(StateTimeout, data: BasePersonData) if data.hasNextLeg && data.nextLeg.beamLeg.mode.isTransit => @@ -1332,16 +1406,24 @@ class PersonAgent( _currentTick.get, Id.createPersonId(id), replanningReason, - currentCoord.getX, - currentCoord.getY + data.nextLeg.beamLeg.travelPath.startPoint.loc.getX, + data.nextLeg.beamLeg.travelPath.startPoint.loc.getY ) ) val nextCoord = nextActivity(data).get.getCoord goto(ChoosingMode) using ChoosesModeData( personData = data - .copy(currentTripMode = Some(WALK_TRANSIT), numberOfReplanningAttempts = data.numberOfReplanningAttempts + 1), + .copy( + currentTripMode = None, + numberOfReplanningAttempts = data.numberOfReplanningAttempts + 1, + passengerSchedule = PassengerSchedule() + ), currentLocation = SpaceTime(currentCoord, _currentTick.get), + pendingChosenTrip = None, + rideHail2TransitRoutingResponse = None, + rideHail2TransitAccessResult = None, + rideHail2TransitEgressResult = None, isWithinTripReplanning = true, excludeModes = if (canUseCars(currentCoord, nextCoord)) Set.empty @@ -1365,7 +1447,7 @@ class PersonAgent( case Event( StateTimeout, data: BasePersonData - ) if data.currentTourModeIsIn(HOV2_TELEPORTATION, HOV3_TELEPORTATION) => + ) if data.currentTripModeIsIn(HOV2_TELEPORTATION, HOV3_TELEPORTATION) => nextActivity(data) match { case Some(activity) => val (tick, triggerId) = releaseTickAndTriggerId() @@ -1497,7 +1579,7 @@ class PersonAgent( if (activityEndTime > tick + beamServices.beamConfig.beam.agentsim.schedulerParallelismWindow) { activityEndTime.toInt } else { - logger.warn( + logger.debug( "Moving back next activity end time from {} to {} to avoid parallelism issues, currently on trip {}", activityEndTime, tick + beamServices.beamConfig.beam.agentsim.schedulerParallelismWindow, @@ -1546,6 +1628,7 @@ class PersonAgent( } case Some(personalVehId) => logger.error(s"Vehicle ${personalVehId.toString} seems to have disappeared") + logger.warn("Events leading up to this point:\n\t" + getLog.mkString("\n\t")) None case None => None @@ -1795,7 +1878,9 @@ class PersonAgent( protected def getCurrentTourStrategy( data: BasePersonData ): TourModeChoiceStrategy = { - _experiencedBeamPlan.getTourStrategy[TourModeChoiceStrategy](currentActivity(data)) + nextActivity(data) + .map(_experiencedBeamPlan.getTourStrategy[TourModeChoiceStrategy](_)) + .getOrElse(TourModeChoiceStrategy()) } private def handleSuccessfulTransitReservation( diff --git a/src/main/scala/beam/agentsim/agents/Population.scala b/src/main/scala/beam/agentsim/agents/Population.scala index 75354f8c715..7cabe26d40d 100755 --- a/src/main/scala/beam/agentsim/agents/Population.scala +++ b/src/main/scala/beam/agentsim/agents/Population.scala @@ -138,8 +138,10 @@ class Population( .collectionAsScalaIterable(household.getVehicleIds) .map { vid => val bv = beamScenario.privateVehicles(BeamVehicle.createId(vid)) + val managerType = + if (bv.isFreightVehicle) VehicleManager.TypeEnum.Freight else VehicleManager.TypeEnum.Household val reservedFor = - VehicleManager.createOrGetReservedFor(household.getId.toString, VehicleManager.TypeEnum.Household) + VehicleManager.createOrGetReservedFor(household.getId.toString, managerType) bv.vehicleManagerId.set(reservedFor.managerId) bv.id -> bv } diff --git a/src/main/scala/beam/agentsim/agents/choice/logit/MultinomialLogit.scala b/src/main/scala/beam/agentsim/agents/choice/logit/MultinomialLogit.scala index 7616a8d6efe..e5e9ac6fa06 100644 --- a/src/main/scala/beam/agentsim/agents/choice/logit/MultinomialLogit.scala +++ b/src/main/scala/beam/agentsim/agents/choice/logit/MultinomialLogit.scala @@ -44,8 +44,10 @@ class MultinomialLogit[A, T]( } def calcAlternativesWithUtility( - alternatives: Map[A, Map[T, Double]] + alternatives: Map[A, Map[T, Double]], + override_scale_factor_option: Option[Double] = None ): Iterable[AlternativeWithUtility[A]] = { + val override_scale_factor = override_scale_factor_option.getOrElse(scale_factor) // evaluate utility of alternatives val altsWithUtility: Iterable[AlternativeWithUtility[A]] = alternatives.foldLeft(List.empty[AlternativeWithUtility[A]]) { case (accumulator, (alt, attributes)) => @@ -56,8 +58,8 @@ class MultinomialLogit[A, T]( // place on tail of list, allowing us to short-circuit the sampling in next step accumulator :+ AlternativeWithUtility( alt, - thisUtility * scale_factor, - math.exp(thisUtility * scale_factor) + thisUtility * override_scale_factor, + math.exp(thisUtility * override_scale_factor) ) } else if (thisUtility.isNegInfinity) { // utility of negative infinity means that an alternative isn't feasible, so we filter it out @@ -65,8 +67,8 @@ class MultinomialLogit[A, T]( } else { AlternativeWithUtility( alt, - thisUtility * scale_factor, - math.exp(thisUtility * scale_factor) + thisUtility * override_scale_factor, + math.exp(thisUtility * override_scale_factor) ) +: accumulator } } @@ -126,18 +128,20 @@ class MultinomialLogit[A, T]( * @return */ def getExpectedMaximumUtility( - alternatives: Map[A, Map[T, Double]] + alternatives: Map[A, Map[T, Double]], + override_scale_factor_option: Option[Double] = None ): Option[Double] = { + val override_scale_factor = override_scale_factor_option.getOrElse(scale_factor) val scaledUtilityOfAlternatives: Iterable[Double] = for { (alt, attributes) <- alternatives utility <- getUtilityOfAlternative(alt, attributes) } yield { - utility * scale_factor + utility * override_scale_factor } if (scaledUtilityOfAlternatives.isEmpty) None - else Some { MathUtils.logSumExp(scaledUtilityOfAlternatives) / scale_factor } + else Some { MathUtils.logSumExp(scaledUtilityOfAlternatives) / override_scale_factor } } /** diff --git a/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceDriveIfAvailable.scala b/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceDriveIfAvailable.scala index 46ea6d4a90c..75614a8bff6 100755 --- a/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceDriveIfAvailable.scala +++ b/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceDriveIfAvailable.scala @@ -53,9 +53,10 @@ class ModeChoiceDriveIfAvailable(val beamServices: BeamServices) extends ModeCho ) = 0.0 override def computeAllDayUtility( - trips: ListBuffer[EmbodiedBeamTrip], + trips: Map[EmbodiedBeamTrip, Map[String, Double]], person: Person, - attributesOfIndividual: AttributesOfIndividual + attributesOfIndividual: AttributesOfIndividual, + overrideAttributes: Boolean = false ): Double = 0.0 } diff --git a/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceLCCM.scala b/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceLCCM.scala index 697f466a79d..6fa699ce539 100644 --- a/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceLCCM.scala +++ b/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceLCCM.scala @@ -290,9 +290,10 @@ class ModeChoiceLCCM( ): Double = 0.0 override def computeAllDayUtility( - trips: ListBuffer[EmbodiedBeamTrip], + trips: Map[EmbodiedBeamTrip, Map[String, Double]], person: Person, - attributesOfIndividual: AttributesOfIndividual + attributesOfIndividual: AttributesOfIndividual, + overrideAttributes: Boolean = false ): Double = { // Compute and log all-day score w.r.t. all modality styles // One of them has many suspicious-looking 0.0 values. Probably something which @@ -305,7 +306,7 @@ class ModeChoiceLCCM( } .toMap .mapValues(modeChoiceCalculatorForStyle => - trips.map(trip => modeChoiceCalculatorForStyle.utilityOf(trip, attributesOfIndividual, None, None)).sum + trips.keys.map(trip => modeChoiceCalculatorForStyle.utilityOf(trip, attributesOfIndividual, None, None)).sum ) .toArray .toMap // to force computation DO NOT TOUCH IT, because here is call-by-name and it's lazy which will hold a lot of memory !!! :) diff --git a/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceMultinomialLogit.scala b/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceMultinomialLogit.scala index eda7e4af9fa..f9cf6b56d38 100755 --- a/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceMultinomialLogit.scala +++ b/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceMultinomialLogit.scala @@ -79,7 +79,15 @@ class ModeChoiceMultinomialLogit( (mct.embodiedBeamTrip, theParams ++ transferParam) }.toMap - val alternativesWithUtility = model.calcAlternativesWithUtility(inputData) + val scaleFactor = beamConfig.beam.agentsim.agents.modalBehaviors.multinomialLogit.units.toLowerCase match { + case "dollars" => None + case "utils" => + Some( + beamConfig.beam.agentsim.agents.modalBehaviors.multinomialLogit.params.time / attributesOfIndividual.valueOfTime * 60.0 + ) + } + + val alternativesWithUtility = model.calcAlternativesWithUtility(inputData, scaleFactor) val chosenModeOpt = model.sampleAlternative(alternativesWithUtility, random) expectedMaximumUtility = model.getExpectedMaximumUtility(inputData).getOrElse(0) @@ -585,7 +593,7 @@ class ModeChoiceMultinomialLogit( numTransfers: Int = 0, transitOccupancyLevel: Double ): Double = { - modeModel.getUtilityOfAlternative(mode, attributes(cost, transitOccupancyLevel, numTransfers)).getOrElse(0) + modeModel.getUtilityOfAlternative(mode, attributes(cost + time, transitOccupancyLevel, numTransfers)).getOrElse(0) } private def attributes(cost: Double, transitOccupancyLevel: Double, numTransfers: Int) = { @@ -597,11 +605,33 @@ class ModeChoiceMultinomialLogit( } override def computeAllDayUtility( - trips: ListBuffer[EmbodiedBeamTrip], + trips: Map[EmbodiedBeamTrip, Map[String, Double]], person: Person, - attributesOfIndividual: AttributesOfIndividual + attributesOfIndividual: AttributesOfIndividual, + overrideAttributes: Boolean = false ): Double = - trips.map(trip => utilityOf(trip, attributesOfIndividual, None, None)).sum // TODO: Update with destination activity + trips.map { case (trip, mods) => + val modeChoiceData = altsToModeCostTimeTransfers( + IndexedSeq(trip), + attributesOfIndividual, + None, + None + ).head + + val newScaledTime = if (overrideAttributes) { + modeChoiceData.scaledTime / mods.getOrElse("travelTimeRatio", 1.0) + } else { modeChoiceData.scaledTime } + + // Placeholder to subtract off other components (e.g. cost) if they differ from expected values + + utilityOf( + trip.tripClassifier, + modeChoiceData.cost, + newScaledTime, + modeChoiceData.numTransfers, + modeChoiceData.transitOccupancyLevel + ) + }.sum // TODO: Update with destination activity } object ModeChoiceMultinomialLogit extends StrictLogging { diff --git a/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceRideHailIfAvailable.scala b/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceRideHailIfAvailable.scala index 5cb7aa10785..3761cda8481 100755 --- a/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceRideHailIfAvailable.scala +++ b/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceRideHailIfAvailable.scala @@ -54,8 +54,9 @@ class ModeChoiceRideHailIfAvailable(val beamServices: BeamServices) extends Mode ): Double = 0.0 override def computeAllDayUtility( - trips: ListBuffer[EmbodiedBeamTrip], + trips: Map[EmbodiedBeamTrip, Map[String, Double]], person: Person, - attributesOfIndividual: AttributesOfIndividual + attributesOfIndividual: AttributesOfIndividual, + overrideAttributes: Boolean = false ): Double = 0.0 } diff --git a/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceTransitIfAvailable.scala b/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceTransitIfAvailable.scala index 2bf8b7438bf..dc679a6d597 100755 --- a/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceTransitIfAvailable.scala +++ b/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceTransitIfAvailable.scala @@ -56,9 +56,10 @@ class ModeChoiceTransitIfAvailable(val beamServices: BeamServices) extends ModeC ): Double = 0.0 override def computeAllDayUtility( - trips: ListBuffer[EmbodiedBeamTrip], + trips: Map[EmbodiedBeamTrip, Map[String, Double]], person: Person, - attributesOfIndividual: AttributesOfIndividual + attributesOfIndividual: AttributesOfIndividual, + overrideAttributes: Boolean = false ): Double = 0.0 } diff --git a/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceUniformRandom.scala b/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceUniformRandom.scala index 71ce672db60..7b645d29cf0 100755 --- a/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceUniformRandom.scala +++ b/src/main/scala/beam/agentsim/agents/choice/mode/ModeChoiceUniformRandom.scala @@ -44,8 +44,9 @@ class ModeChoiceUniformRandom(val beamConfig: BeamConfig) extends ModeChoiceCalc ): Double = 0.0 override def computeAllDayUtility( - trips: ListBuffer[EmbodiedBeamTrip], + trips: Map[EmbodiedBeamTrip, Map[String, Double]], person: Person, - attributesOfIndividual: AttributesOfIndividual + attributesOfIndividual: AttributesOfIndividual, + overrideAttributes: Boolean = false ): Double = 0.0 } diff --git a/src/main/scala/beam/agentsim/agents/freight/FreightEntities.scala b/src/main/scala/beam/agentsim/agents/freight/FreightEntities.scala index 779e6ea5507..e30bc18d79c 100644 --- a/src/main/scala/beam/agentsim/agents/freight/FreightEntities.scala +++ b/src/main/scala/beam/agentsim/agents/freight/FreightEntities.scala @@ -16,17 +16,26 @@ sealed abstract class FreightRequestType extends EnumEntry object FreightRequestType extends Enum[FreightRequestType] { val values: immutable.IndexedSeq[FreightRequestType] = findValues - case object Unloading extends FreightRequestType case object Loading extends FreightRequestType +} + +sealed abstract class FreightDeliveryType extends EnumEntry { val value: String } +object FreightDeliveryType extends Enum[FreightDeliveryType] { + val values: immutable.IndexedSeq[FreightDeliveryType] = findValues + case object B2B extends FreightDeliveryType { override val value = "b2b" } + case object B2C extends FreightDeliveryType { override val value = "b2c" } + case object Whatever extends FreightDeliveryType { override val value = "whatever" } + + def apply(s: String): FreightDeliveryType = { + if (s.trim.toLowerCase.startsWith(B2B.value) || s.trim.toLowerCase.contains(B2B.value)) B2B + else if (s.trim.toLowerCase.startsWith(B2C.value) || s.trim.toLowerCase.contains(B2C.value)) B2C + else Whatever + } } -case class FreightTour( - tourId: Id[FreightTour], - departureTimeInSec: Int, - maxTourDurationInSec: Int -) +case class FreightTour(tourId: Id[FreightTour], departureTimeInSec: Int, maxTourDurationInSec: Int) case class PayloadPlan( payloadId: Id[PayloadPlan], diff --git a/src/main/scala/beam/agentsim/agents/freight/FreightReplanner.scala b/src/main/scala/beam/agentsim/agents/freight/FreightReplanner.scala index d2debfe9302..00620ba08cf 100644 --- a/src/main/scala/beam/agentsim/agents/freight/FreightReplanner.scala +++ b/src/main/scala/beam/agentsim/agents/freight/FreightReplanner.scala @@ -143,7 +143,7 @@ class FreightReplanner( ): TimeDistanceCost = { val beamVehicleType = (for { vehicle <- maybeVehicle - vehicleType <- freightCarrier.fleet.get(Id.createVehicleId(vehicle.id)) + vehicleType <- beamServices.beamScenario.privateVehicles.get(Id.createVehicleId(vehicle.id)) } yield vehicleType.beamVehicleType).getOrElse(freightCarrier.fleet.values.head.beamVehicleType) val fuelPrice: Double = beamServices.beamScenario.fuelTypePrices(beamVehicleType.primaryFuelType) @@ -186,9 +186,10 @@ class FreightReplanner( def solveForTheWholeFeet: Solution = { val vehicles = - freightCarrier.fleet.values - .map(beamVehicle => { + freightCarrier.fleet.keys + .map(beamVehicleId => { val departure = randomTimeAround(departureTime) + val beamVehicle = beamServices.beamScenario.privateVehicles(Id.createVehicleId(beamVehicleId)) toJspritVehicle(freightCarrier.carrierId, beamVehicle, departure) }) .toIndexedSeq @@ -207,7 +208,7 @@ class FreightReplanner( val tourSolutions = for { (vehicleId, tours) <- freightCarrier.tourMap - beamVehicle = freightCarrier.fleet(vehicleId) + beamVehicle = beamServices.beamScenario.privateVehicles(Id.createVehicleId(vehicleId)) tour <- tours services = freightCarrier.plansPerTour(tour.tourId).map(toService) vehicles = IndexedSeq(toJspritVehicle(freightCarrier.carrierId, beamVehicle, tour.departureTimeInSec)) diff --git a/src/main/scala/beam/agentsim/agents/freight/input/FreightReader.scala b/src/main/scala/beam/agentsim/agents/freight/input/FreightReader.scala index 85e824f9b69..b3fb12b73ac 100644 --- a/src/main/scala/beam/agentsim/agents/freight/input/FreightReader.scala +++ b/src/main/scala/beam/agentsim/agents/freight/input/FreightReader.scala @@ -42,7 +42,7 @@ trait FreightReader { vehicleTypes: Map[Id[BeamVehicleType], BeamVehicleType] ): IndexedSeq[FreightCarrier] - def calculatePayloadWeights(plans: IndexedSeq[PayloadPlan]): IndexedSeq[(Set[Id[PayloadPlan]], Double)] = { + private def calculatePayloadWeights(plans: IndexedSeq[PayloadPlan]): IndexedSeq[(Set[Id[PayloadPlan]], Double)] = { plans.foldLeft(IndexedSeq((Set.empty[Id[PayloadPlan]], 0.0))) { case (acc, PayloadPlan(payloadId, _, _, _, weight, Unloading, _, _, _, _, _, _, _)) => val (payloads, payloadWeight) = acc.last @@ -107,8 +107,8 @@ trait FreightReader { householdsFactory: HouseholdsFactory ): IndexedSeq[(FreightCarrier, Household, Plan, Person, Id[BeamVehicle])] = { carriers.flatMap { carrier => - val freightHouseholdId = createHouseholdId(carrier.carrierId) - val household = householdsFactory.createHousehold(freightHouseholdId) + val freightCarrierId = createHouseholdId(carrier.carrierId) + val household = householdsFactory.createHousehold(freightCarrierId) household.setIncome(new IncomeImpl(0, Income.IncomePeriod.year)) carrier.tourMap.map { case (vehicleId, tours) => val personId = createPersonId(carrier.carrierId, vehicleId) @@ -139,7 +139,7 @@ trait FreightReader { powertrain, vehicleType, vehicleManagerId = new AtomicReference( - VehicleManager.createOrGetReservedFor(carrierId.toString, VehicleManager.TypeEnum.Freight).managerId + VehicleManager.createOrGetReservedFor(carrierId.toString, Some(VehicleManager.TypeEnum.Freight)).managerId ), randomSeed ) @@ -147,7 +147,7 @@ trait FreightReader { vehicle } - protected def createFreightActivity( + private def createFreightActivity( activityType: String, locationUTM: Coord, endTime: Int, @@ -161,7 +161,7 @@ trait FreightReader { act } - protected def createFreightLeg(departureTime: Int): Leg = { + private def createFreightLeg(departureTime: Int): Leg = { val leg = PopulationUtils.createLeg(BeamMode.CAR.value) leg.setDepartureTime(departureTime) leg @@ -169,7 +169,7 @@ trait FreightReader { } object FreightReader { - val FREIGHT_ID_PREFIX = "freight" + val CARRIER_ID_PREFIX = "carrier" val FREIGHT_REQUEST_TYPE = "FreightRequestType" val PAYLOAD_WEIGHT_IN_KG = "PayloadWeightInKg" val PAYLOAD_IDS = "PayloadIds" diff --git a/src/main/scala/beam/agentsim/agents/freight/input/GenericFreightReader.scala b/src/main/scala/beam/agentsim/agents/freight/input/GenericFreightReader.scala index 5f000ca55c2..5e1d12c3ad3 100644 --- a/src/main/scala/beam/agentsim/agents/freight/input/GenericFreightReader.scala +++ b/src/main/scala/beam/agentsim/agents/freight/input/GenericFreightReader.scala @@ -1,7 +1,7 @@ package beam.agentsim.agents.freight.input import beam.agentsim.agents.freight._ -import beam.agentsim.agents.freight.input.FreightReader.{FREIGHT_ID_PREFIX, NO_CARRIER_ID, NO_VEHICLE_ID} +import beam.agentsim.agents.freight.input.FreightReader._ import beam.agentsim.agents.vehicles.{BeamVehicle, BeamVehicleType} import beam.agentsim.infrastructure.taz.{TAZ, TAZTreeMap} import beam.sim.common.GeoUtils @@ -125,16 +125,19 @@ class GenericFreightReader( ) } val operationDurationInSec = get("operationDurationInSec").toDouble.round.toInt - val activityType = if (config.generateFixedActivitiesDurations) { - s"${requestType.toString}|$operationDurationInSec" - } else { - requestType.toString - } val payloadId = get("payloadId").createId[PayloadPlan] val locationX = row.get("locationX") val locationY = row.get("locationY") + val deliveryType: FreightDeliveryType = FreightDeliveryType(payloadId.toString) + + val activityType = if (config.generateFixedActivitiesDurations) { + s"${deliveryType.value}|${requestType.toString}|$operationDurationInSec" + } else { + s"${deliveryType.value}|${requestType.toString}" + } + extractCoordInUtmOrTaz( locationX, locationY, @@ -253,8 +256,8 @@ class GenericFreightReader( val tourMap: Map[Id[BeamVehicle], IndexedSeq[FreightTour]] = carrierRows .groupBy(_.vehicleId) - .mapValues { rows => - rows + .mapValues { + _ //setting the tour warehouse location to be the carrier warehouse location .map(row => tours(row.tourId)) .sortBy(_.departureTimeInSec) @@ -293,10 +296,10 @@ class GenericFreightReader( if (isGoodsCarrier) if (carrierIdStr == null || carrierIdStr.isBlank) NO_CARRIER_ID else carrierIdStr.createId - else s"${FREIGHT_ID_PREFIX}Carrier-$carrierIdStr".createId[FreightCarrier] + else s"$CARRIER_ID_PREFIX-$carrierIdStr".createId[FreightCarrier] val tourId: Id[FreightTour] = get("tourId").createId val vehicleId: Id[BeamVehicle] = - if (isGoodsCarrier) NO_VEHICLE_ID else Id.createVehicleId(s"${FREIGHT_ID_PREFIX}Vehicle-$vehicleIdStr") + if (isGoodsCarrier) NO_VEHICLE_ID else Id.createVehicleId(s"${CARRIER_ID_PREFIX}Vehicle-$vehicleIdStr") val vehicleTypeId: Id[BeamVehicleType] = if (isGoodsCarrier) "no-type".createId else get("vehicleTypeId").createId if (!existingAllTours.contains(tourId)) { logger.error(f"Following freight carrier row discarded because tour $tourId was filtered out: $row") @@ -378,14 +381,14 @@ class GenericFreightReader( @Override def createPersonId(carrierId: Id[FreightCarrier], vehicleId: Id[BeamVehicle]): Id[Person] = { - val updatedVehicleId = vehicleId.toString.replace(FREIGHT_ID_PREFIX + "Vehicle-", "") - Id.createPersonId(s"${FREIGHT_ID_PREFIX}Driver-$updatedVehicleId") + val updatedVehicleId = vehicleId.toString.replace(s"${CARRIER_ID_PREFIX}Vehicle-", "") + Id.createPersonId(s"${CARRIER_ID_PREFIX}Driver-$updatedVehicleId") } @Override def createHouseholdId(carrierId: Id[FreightCarrier]): Id[Household] = { val updatedCarrierId = carrierId.toString.replace(FREIGHT_ID_PREFIX + "Carrier-", "") - s"${FREIGHT_ID_PREFIX}Household-$updatedCarrierId".createId + s"carrier-$updatedCarrierId".createId } } diff --git a/src/main/scala/beam/agentsim/agents/household/HouseholdActor.scala b/src/main/scala/beam/agentsim/agents/household/HouseholdActor.scala index bf63b67759c..41631accf00 100755 --- a/src/main/scala/beam/agentsim/agents/household/HouseholdActor.scala +++ b/src/main/scala/beam/agentsim/agents/household/HouseholdActor.scala @@ -28,6 +28,7 @@ import beam.agentsim.events.SpaceTime import beam.agentsim.infrastructure.ChargingNetworkManager.ChargingPlugRequest import beam.agentsim.infrastructure.ParkingInquiry.{ParkingActivityType, ParkingSearchMode} import beam.agentsim.infrastructure.{ParkingInquiry, ParkingInquiryResponse} +import beam.agentsim.infrastructure.parking.ParkingType import beam.agentsim.scheduler.BeamAgentScheduler.{CompletionNotice, ScheduleTrigger} import beam.agentsim.scheduler.HasTriggerId import beam.agentsim.scheduler.Trigger.TriggerWithId @@ -127,6 +128,8 @@ object HouseholdActor { case class MobilityStatusResponse(streetVehicle: Vector[VehicleOrToken], triggerId: Long) extends HasTriggerId + case class RetryModeChoice(triggerId: Long) extends HasTriggerId + case class GetVehicleTypes(triggerId: Long) extends HasTriggerId case class VehicleTypesResponse(vehicleTypes: Set[BeamVehicleType], triggerId: Long) extends HasTriggerId @@ -159,7 +162,7 @@ object HouseholdActor { val population: org.matsim.api.core.v01.population.Population, val household: Household, vehicles: Map[Id[BeamVehicle], BeamVehicle], - fallbackHomeCoord: Coord, + fallbackInitialLocationCoord: Coord, sharedVehicleFleets: Seq[ActorRef] = Vector(), possibleSharedVehicleTypes: Set[BeamVehicleType], routeHistory: RouteHistory, @@ -196,8 +199,7 @@ object HouseholdActor { private var members: Map[Id[Person], PersonIdWithActorRef] = Map() - private val isFreightCarrier: Boolean = - household.getId.toString.startsWith(FreightReader.FREIGHT_ID_PREFIX) + private val isFreightCarrier: Boolean = household.getId.toString.startsWith(FreightReader.CARRIER_ID_PREFIX) // Data need to execute CAV dispatch private val cavPlans: mutable.ListBuffer[CAVSchedule] = mutable.ListBuffer() @@ -205,7 +207,7 @@ object HouseholdActor { private var personAndActivityToCav: Map[(Id[Person], Activity), BeamVehicle] = Map() private var personAndActivityToLegs: Map[(Id[Person], Activity), List[BeamLeg]] = Map() - private var householdMembersToActivityTypeAndLocation: Map[Id[Person], HomeAndStartingWorkLocation] = + private var householdMembersToActivityTypeAndLocation: Map[Id[Person], ActivityTypeAndLocation] = Map() private val trackingCAVAssignmentAtInitialization = mutable.HashMap.empty[Id[BeamVehicle], Id[Person]] private val householdVehicleCategories = List(Car, Bike) @@ -235,9 +237,10 @@ object HouseholdActor { } person.getSelectedPlan.getPlanElements.asScala.find(_.isInstanceOf[Activity]) map { element => val act = element.asInstanceOf[Activity] - val parkingActivityType = ParkingInquiry.activityTypeStringToEnum(act.getType) + val actType = if (isFreightCarrier) ParkingActivityType.Depot.toString else act.getType + val parkingActivityType = ParkingInquiry.activityTypeStringToEnum(actType) val endTime = act.getEndTime.orElseGet(() => DateUtils.getEndOfTime(beamServices.beamScenario.beamConfig)) - person.getId -> HomeAndStartingWorkLocation( + person.getId -> ActivityTypeAndLocation( parkingActivityType, act.getType, act.getCoord, @@ -247,15 +250,30 @@ object HouseholdActor { } .toMap - if (!householdMembersToActivityTypeAndLocation.exists(_._2.parkingActivityType == ParkingActivityType.Home)) { + if ( + isFreightCarrier && !householdMembersToActivityTypeAndLocation.exists( + _._2.parkingActivityType == ParkingActivityType.Depot + ) + ) { + householdMembersToActivityTypeAndLocation ++= Map( + Id.createPersonId("NoDriver") -> ActivityTypeAndLocation( + ParkingActivityType.Depot, + "Warehouse", + fallbackInitialLocationCoord, + DateUtils.getEndOfTime(beamServices.beamScenario.beamConfig) + ) + ) + } else if ( + !isFreightCarrier && !householdMembersToActivityTypeAndLocation.exists( + _._2.parkingActivityType == ParkingActivityType.Home + ) + ) { householdMembersToActivityTypeAndLocation ++= Map( - Id.createPersonId("") -> HomeAndStartingWorkLocation( + Id.createPersonId("NoDriver") -> ActivityTypeAndLocation( ParkingActivityType.Home, "Home", - fallbackHomeCoord, - DateUtils.getEndOfTime( - beamServices.beamScenario.beamConfig - ) + fallbackInitialLocationCoord, + DateUtils.getEndOfTime(beamServices.beamScenario.beamConfig) ) ) } @@ -285,6 +303,7 @@ object HouseholdActor { Some(new EmergencyHouseholdVehicleGenerator(household, beamScenario, vehiclesAdjustment, category)) else None, whoDrivesThisFreightVehicle, + isFreightCarrier, beamServices.matsimServices.getEvents, beamServices.geo, beamServices.beamConfig, @@ -336,7 +355,7 @@ object HouseholdActor { .map(_._1) .getOrElse(householdMembersToActivityTypeAndLocation.keys.head) trackingCAVAssignmentAtInitialization.put(cav.id, personId) - val HomeAndStartingWorkLocation(_, _, location, _) = householdMembersToActivityTypeAndLocation(personId) + val ActivityTypeAndLocation(_, _, location, _) = householdMembersToActivityTypeAndLocation(personId) cav.spaceTime = SpaceTime(location, 0) schedulerRef ! ScheduleTrigger(InitializeTrigger(0), cavDriverRef) cav.setManager(Some(self)) @@ -604,11 +623,12 @@ object HouseholdActor { .sequence(vehicles.filter(_._2.isCAV).values.map { vehicle => vehicle.setManager(Some(self)) val personId = trackingCAVAssignmentAtInitialization(vehicle.id) - val HomeAndStartingWorkLocation(_, activityType, location, endTime) = + val ActivityTypeAndLocation(_, activityType, location, endTime) = householdMembersToActivityTypeAndLocation(personId) val parkingDuration = endTime - tick for { ParkingInquiryResponse(stall, _, _) <- sendParkingOrChargingInquiry( + personId, vehicle, activityType, location, @@ -637,6 +657,7 @@ object HouseholdActor { } private def sendParkingOrChargingInquiry( + person: Id[Person], vehicle: BeamVehicle, activityType: String, location: Coord, @@ -647,6 +668,7 @@ object HouseholdActor { SpaceTime(location, 0), activityType, VehicleManager.getReservedFor(vehicle.vehicleManagerId.get).get, + personId = Option(person), beamVehicle = Option(vehicle), triggerId = triggerId, searchMode = ParkingSearchMode.Init, @@ -671,14 +693,19 @@ object HouseholdActor { } } + object EmergencyHouseholdVehicleGenerator { + private val sharedRandomGenerator = new UniformRealDistributionEnhanced() + } + class EmergencyHouseholdVehicleGenerator( household: Household, beamScenario: BeamScenario, vehiclesAdjustment: VehiclesAdjustment, defaultCategory: VehicleCategory ) extends LazyLogging { - private val realDistribution: UniformRealDistributionEnhanced = new UniformRealDistributionEnhanced() - realDistribution.reseedRandomGenerator(beamScenario.beamConfig.matsim.modules.global.randomSeed) + + private val realDistribution: UniformRealDistributionEnhanced = + EmergencyHouseholdVehicleGenerator.sharedRandomGenerator def sampleVehicleTypeForEmergencyUse( personId: Id[Person], @@ -733,7 +760,9 @@ object HouseholdActor { manager: ActorRef ): BeamVehicle = { val vehicleManagerId = - VehicleManager.createOrGetReservedFor(household.getId.toString, VehicleManager.TypeEnum.Household).managerId + VehicleManager + .createOrGetReservedFor(household.getId.toString, Some(VehicleManager.TypeEnum.Household)) + .managerId val vehicle = new BeamVehicle( Id.createVehicleId(personId.toString + "-emergency-" + vehicleIndex), new Powertrain(vehicleType.primaryFuelConsumptionInJoulePerMeter), @@ -750,7 +779,7 @@ object HouseholdActor { } } - case class HomeAndStartingWorkLocation( + case class ActivityTypeAndLocation( parkingActivityType: ParkingActivityType, activityType: String, activityLocation: Coord, diff --git a/src/main/scala/beam/agentsim/agents/household/HouseholdFleetManager.scala b/src/main/scala/beam/agentsim/agents/household/HouseholdFleetManager.scala index f4fcc1abce3..951fa1a10c7 100644 --- a/src/main/scala/beam/agentsim/agents/household/HouseholdFleetManager.scala +++ b/src/main/scala/beam/agentsim/agents/household/HouseholdFleetManager.scala @@ -36,9 +36,10 @@ class HouseholdFleetManager( parkingManager: ActorRef, chargingNetworkManager: ActorRef, vehicles: Map[Id[BeamVehicle], BeamVehicle], - homeAndStartingWorkLocations: Map[Id[Person], HomeAndStartingWorkLocation], + householdMembersToActivityTypeAndLocation: Map[Id[Person], ActivityTypeAndLocation], maybeEmergencyHouseholdVehicleGenerator: Option[EmergencyHouseholdVehicleGenerator], whoDrivesThisFreightVehicle: Map[Id[BeamVehicle], Id[Person]], // so far only freight module is using this collection + isFreightCarrier: Boolean, eventsManager: EventsManager, geo: GeoUtils, beamConfig: BeamConfig, @@ -62,6 +63,7 @@ class HouseholdFleetManager( logger.debug(s"ResolvedParkingResponses ($triggerId, $xs)") xs.foreach { case (id, resp) => val veh = vehiclesInternal(id) + val person = trackingVehicleAssignmentAtInitialization(id) veh.setManager(Some(self)) veh.spaceTime = SpaceTime(resp.stall.locationUTM.getX, resp.stall.locationUTM.getY, 0) veh.setMustBeDrivenHome(false) @@ -71,7 +73,7 @@ class HouseholdFleetManager( stall = resp.stall, locationWGS = geo.utm2Wgs(resp.stall.locationUTM), vehicleId = id, - driverId = "None" + driverId = person.toString ) eventsManager.processEvent(parkEvent) if (resp.stall.chargingPointType.isDefined) { @@ -95,22 +97,44 @@ class HouseholdFleetManager( val listOfFutures: List[Future[(Id[BeamVehicle], ParkingInquiryResponse)]] = { // Request that all household vehicles be parked at the home coordinate. If the vehicle is an EV, // send the request to the charging manager. Otherwise send request to the parking manager. - val workingPersonsList = - homeAndStartingWorkLocations.filter(_._2.parkingActivityType == ParkingActivityType.Work).keys.toBuffer + val workingPersonsList = householdMembersToActivityTypeAndLocation + .filter(_._2.parkingActivityType == ParkingActivityType.Work) + .keys + .toBuffer vehicles.toList.map { case (id, vehicle) => - val personId: Id[Person] = - if (workingPersonsList.nonEmpty) workingPersonsList.remove(0) - else + val personId: Id[Person] = { + if (vehicle.isFreightVehicle) { + homeAndStartingWorkLocations + .find(_._2.parkingActivityType == ParkingActivityType.Freight) + .map(_._1) + .getOrElse { + homeAndStartingWorkLocations.foreach { case (personId, location) => + println(s"Person ID: $personId") + println(s" Parking Activity Type: ${location.parkingActivityType}") + println(s" Activity Type: ${location.activityType}") + println(s" Activity Location: ${location.activityLocation}") + println(s" Activity End Time: ${location.activityEndTime}") + println("---") + } + throw new RuntimeException( + s"Freight vehicle ${vehicle.id} has no assigned person with Freight parking activity" + ) + } + } else if (workingPersonsList.isEmpty) { homeAndStartingWorkLocations .find(_._2.parkingActivityType == ParkingActivityType.Home) .map(_._1) - .getOrElse(homeAndStartingWorkLocations.keys.head) + .getOrElse(householdMembersToActivityTypeAndLocation.keys.head) +} else workingPersonsList.remove(0) + } trackingVehicleAssignmentAtInitialization.put(vehicle.id, personId) - val HomeAndStartingWorkLocation(_, activityType, location, endTime) = homeAndStartingWorkLocations(personId) + val ActivityTypeAndLocation(_, activityType, location, endTime) = + householdMembersToActivityTypeAndLocation(personId) val inquiry = ParkingInquiry.init( SpaceTime(location, 0), activityType, VehicleManager.getReservedFor(vehicle.vehicleManagerId.get).get, + personId = Option(personId), beamVehicle = Option(vehicle), triggerId = triggerId, searchMode = ParkingSearchMode.Init, @@ -170,7 +194,7 @@ class HouseholdFleetManager( case inquiry @ MobilityStatusInquiry(personId, _, _, requireVehicleCategoryAvailable, triggerId) => val availableVehicleMaybe: Option[BeamVehicle] = requireVehicleCategoryAvailable match { - case Some(_) if personId.toString.startsWith(FreightReader.FREIGHT_ID_PREFIX) => + case Some(_) if personId.toString.startsWith(FreightReader.CARRIER_ID_PREFIX) => whoDrivesThisFreightVehicle .filter(_._2 == personId) .flatMap { case (vehicleId, _) => availableVehicles.find(_.id == vehicleId) } @@ -193,7 +217,7 @@ class HouseholdFleetManager( requireVehicleCategoryAvailable match { case Some(requiredType) if vehicles.values.exists(_.beamVehicleType.vehicleCategory == requiredType) => logger.warn(s"Emergency vehicle generation for type $requiredType failed") - case Some(requiredType) => + case Some(_) => logger.debug(s"Ignoring vehicle request because it isn't for the right category") case None => } @@ -248,9 +272,15 @@ class HouseholdFleetManager( // Pipe my car through the parking manager // and complete initialization only when I got them all. + val reservedFor = VehicleManager.getReservedFor(vehicle.vehicleManagerId.get()).get + val activityType = if (reservedFor.managerType == VehicleManager.TypeEnum.Freight) { + ParkingActivityType.Freight.toString + } else { + ParkingActivityType.Wherever.toString + } val responseFuture = parkingManager ? ParkingInquiry.init( inquiry.whereWhen, - "wherever", + activityType, VehicleManager.getReservedFor(vehicle.vehicleManagerId.get()).get, Some(vehicle), triggerId = inquiry.triggerId, diff --git a/src/main/scala/beam/agentsim/agents/modalbehaviors/ChoosesMode.scala b/src/main/scala/beam/agentsim/agents/modalbehaviors/ChoosesMode.scala index aafb8207abf..e50b0e42823 100644 --- a/src/main/scala/beam/agentsim/agents/modalbehaviors/ChoosesMode.scala +++ b/src/main/scala/beam/agentsim/agents/modalbehaviors/ChoosesMode.scala @@ -5,7 +5,12 @@ import akka.pattern.pipe import beam.agentsim.agents.BeamAgent._ import beam.agentsim.agents.PersonAgent._ import beam.agentsim.agents._ -import beam.agentsim.agents.household.HouseholdActor.{MobilityStatusInquiry, MobilityStatusResponse, ReleaseVehicle} +import beam.agentsim.agents.household.HouseholdActor.{ + MobilityStatusInquiry, + MobilityStatusResponse, + ReleaseVehicle, + RetryModeChoice +} import beam.agentsim.agents.modalbehaviors.ChoosesMode._ import beam.agentsim.agents.modalbehaviors.DrivesVehicle.{ActualVehicle, Token, VehicleOrToken} import beam.agentsim.agents.planning.Strategy.{TourModeChoiceStrategy, TripModeChoiceStrategy} @@ -207,6 +212,12 @@ trait ChoosesMode { case (data: ChoosesModeData, Some(BIKE_TRANSIT | DRIVE_TRANSIT), Some(WALK_BASED)) if data.personData.currentTourPersonalVehicle.isDefined => val currentTourPersonalVehicleId = data.personData.currentTourPersonalVehicle.get + if (data.isWithinTripReplanning) { + logger.debug( + s"Person ${this.id} is within trip replanning. " + + s"Current tour personal vehicle id: $currentTourPersonalVehicleId" + ) + } if (beamVehicles.contains(currentTourPersonalVehicleId)) { self ! MobilityStatusResponse( Vector(beamVehicles(currentTourPersonalVehicleId)), @@ -325,6 +336,16 @@ trait ChoosesMode { } } + /** + * Sends a request to the given vehicle fleets to determine the availability of vehicles at a specific location and activity. + * An optional vehicle category can be specified to filter the results. + * + * @param vehicleFleets the list of vehicle fleet actor references to query for available vehicles + * @param location the location and time for which to check vehicle availability + * @param activity the activity associated with the request, which may influence vehicle availability + * @param requireVehicleCategoryAvailable an optional vehicle category to filter available vehicles; if specified, only vehicles of this category will be included + * @return a Future containing a MobilityStatusResponse that includes a collection of available vehicles and a trigger ID + */ private def requestAvailableVehicles( vehicleFleets: Seq[ActorRef], location: SpaceTime, @@ -373,6 +394,14 @@ trait ChoosesMode { var currentTripMode = (currentTripStrategy.mode, personData.currentTripMode) match { case (None, None) => None + case (Some(strategyMode @ (DRIVE_TRANSIT | BIKE_TRANSIT | RIDE_HAIL_TRANSIT)), None) + if choosesModeData.isWithinTripReplanning => + logger.debug( + s"Keeping my _experiencedBeamPlan mode as $strategyMode and ChoosesModeData " + + "as WALK_TRANSIT because I missed my initial transit leg but want to keep my vehicle, my input chooses mode" + + "trip mode was set to None" + ) + Some(WALK_TRANSIT) case (Some(strategyMode), None) => Some(strategyMode) case (Some(strategyMode), Some(dataMode)) if strategyMode == dataMode => @@ -382,16 +411,17 @@ trait ChoosesMode { TripModeChoiceStrategy(Some(dataMode)) _experiencedBeamPlan.putStrategy(_experiencedBeamPlan.getTripContaining(nextAct), updatedTripStrategy) Some(dataMode) - case (Some(DRIVE_TRANSIT), Some(WALK_TRANSIT)) if choosesModeData.isWithinTripReplanning => - logger.debug( - "Keeping my _experiencedBeamPlan mode as DRIVE_TRANSIT and ChoosesModeData" + + case (Some(mode @ (DRIVE_TRANSIT | BIKE_TRANSIT | RIDE_HAIL_TRANSIT)), Some(WALK_TRANSIT)) + if choosesModeData.isWithinTripReplanning => + logger.warn( + f"Keeping my _experiencedBeamPlan mode as $mode and ChoosesModeData" + "as WALK_TRANSIT because I missed my initial transit leg but want to keep my vehicle" ) Some(WALK_TRANSIT) - case (Some(BIKE_TRANSIT), Some(WALK_TRANSIT)) if choosesModeData.isWithinTripReplanning => - logger.debug( - "Keeping my _experiencedBeamPlan mode as BIKE_TRANSIT and ChoosesModeData" + - "as WALK_TRANSIT because I missed my initial transit leg but want to keep my vehicle" + case (Some(WALK_TRANSIT), Some(DRIVE_TRANSIT)) if choosesModeData.isWithinTripReplanning => + logger.warn( + "Keeping my _experiencedBeamPlan mode as WALK_TRANSIT and ChoosesModeData" + + s"as DRIVE_TRANSIT, even though I don't know why. Full personData: $personData " ) Some(WALK_TRANSIT) case _ => @@ -432,18 +462,29 @@ trait ChoosesMode { case (None, Some(ps)) if ps.tourMode.contains(WALK_BASED) => Vector() case _ => - personData.currentTourPersonalVehicle.map(vehId => beamVehicles(vehId)).toVector + personData.currentTourPersonalVehicle + .flatMap(vehId => { + beamVehicles.get(vehId) match { + case Some(vehicle) => Some(vehicle) + case None => + logger.error(s"Vehicle with ID $vehId from currentTourPersonalVehicle not found in beamVehicles map") + //throw new NoSuchElementException(s"Vehicle ID $vehId not found") + None + } + }) + .toVector } availablePersonalStreetVehicles ++= availableVehicleFromParentTour val availableEmergencyVehicles = beamVehicles.filterKeys(k => k.toString.startsWith(f"${this.id.toString}-emergency")).values.toVector -// val otherNewAndTourVehicles = -// filterAvailableVehicles(availablePersonalStreetVehicles ++ availableEmergencyVehicles, currentTourStrategy) + // val otherNewAndTourVehicles = + // filterAvailableVehicles(availablePersonalStreetVehicles ++ availableEmergencyVehicles, currentTourStrategy) val otherNewAndTourVehicles = filterAvailableVehicles( availablePersonalStreetVehicles ++ availableEmergencyVehicles, - currentTourStrategy + currentTourStrategy, + parentTourStrategy.nonEmpty ).distinct val availableModesGivenTourMode = getAvailableModesGivenTourMode( @@ -472,10 +513,6 @@ trait ChoosesMode { triggerId ) - // Note that remainingAvailableVehicles includes all vehicles that were available, - // and any unused vehicles will be released. - // That's why we remove any drive_transit vehicles after - // replanning -- so they don't get released. val newPersonData = choosesModeData.copy( personData = personData .copy( @@ -561,15 +598,23 @@ trait ChoosesMode { // by R5. These don't necessarily have correct wait times, but R5 has been updated to give them appropriate costs. // Once we've chosen the best itinerary we can send requests to the RHM to fill in true costs and wait times - val rhTransitTrip = modeChoiceCalculator( - theRouterResult.itineraries.filter(_.tripClassifier == RIDE_HAIL_TRANSIT).toIndexedSeq, - matsimPlan.getPerson.getCustomAttributes - .get("beam-attributes") - .asInstanceOf[AttributesOfIndividual], - nextActivity(choosesModeData.personData), - Some(currentActivity(choosesModeData.personData)), - Some(matsimPlan.getPerson) - ) + // val rhTransitTrip = modeChoiceCalculator( + // theRouterResult.itineraries.filter(_.tripClassifier == RIDE_HAIL_TRANSIT).toIndexedSeq, + // matsimPlan.getPerson.getCustomAttributes + // .get("beam-attributes") + // .asInstanceOf[AttributesOfIndividual], + // nextActivity(choosesModeData.personData), + // Some(currentActivity(choosesModeData.personData)), + // Some(matsimPlan.getPerson) + // ) + + val rhTransitTrip = theRouterResult.itineraries + .filter(trip => + (trip.tripClassifier == RIDE_HAIL_TRANSIT) && (trip.legs.head.beamLeg.startTime > (_currentTick.get + 300)) + ) match { + case Seq() => None + case x => Some(x.minBy(_.totalTravelTimeInSecs)) + } // If there's a drive-transit trip AND we don't have an error RH2Tr response (due to no desire to use RH) then seek RH on access and egress val newPersonData = @@ -580,11 +625,11 @@ trait ChoosesMode { ) ) { val accessSegment = - rhTransitTrip.get.legs.view + rhTransitTrip.get.legs .takeWhile(!_.beamLeg.mode.isMassTransit) .map(_.beamLeg) val egressSegment = - rhTransitTrip.get.legs.view.reverse.takeWhile(!_.beamLeg.mode.isTransit).reverse.map(_.beamLeg) + rhTransitTrip.get.legs.reverse.takeWhile(!_.beamLeg.mode.isTransit).reverse.map(_.beamLeg) val (accessId, accessResult) = if ( (accessSegment.map(_.travelPath.distanceInM).sum > 0) & accessSegment @@ -751,6 +796,12 @@ trait ChoosesMode { (choosesModeData.parkingRequestIds(parkingInquiryResponse.requestId) -> parkingInquiryResponse) ) stay using newPersonData + case Event(_: RetryModeChoice, choosesModeData: ChoosesModeData) => + val newPersonData = choosesModeData.copy( + routingFinished = true, + parkingRequestIds = Map.empty // Clear pending parking requests + ) + stay using newPersonData case Event(cavTripLegsResponse: CavTripLegsResponse, choosesModeData: ChoosesModeData) => stay using choosesModeData.copy(cavTripLegs = Some(cavTripLegsResponse)) //handling response with the shared vehicle nearby the egress legs @@ -813,24 +864,13 @@ trait ChoosesMode { ) } using completeChoiceIfReady) - private def correctCurrentTripModeAccordingToRules( - currentTripMode: Option[BeamMode], - personData: BasePersonData, - availableModes: Seq[BeamMode] - ): Option[BeamMode] = { - val replanningIsAvailable = - personData.numberOfReplanningAttempts < beamServices.beamConfig.beam.agentsim.agents.modalBehaviors.maximumNumberOfReplanningAttempts - currentTripMode match { - case Some(mode @ (HOV2_TELEPORTATION | HOV3_TELEPORTATION)) - if availableModes.contains(CAR) && replanningIsAvailable => - Some(mode) - case Some(mode) if availableModes.contains(mode) && replanningIsAvailable => Some(mode) - case Some(mode) if availableModes.contains(mode) => Some(WALK) - case None if !replanningIsAvailable => Some(WALK) - case _ => None - } - } - + /** + * Generates a sequence of parking inquiries for vehicles in the given itineraries, based on the + * chosen mode data and parking behavior of the vehicles. It checks which vehicles have already + * been requested for parking and creates inquiries for the remaining vehicles. + * + * @param choosesModeData Data related to the mode choice of the person, including parking + */ private def makeParkingInquiries( choosesModeData: ChoosesModeData, itineraries: Seq[EmbodiedBeamTrip] @@ -1049,6 +1089,19 @@ trait ChoosesMode { case object FinishingModeChoice extends BeamAgentState + /** + * Creates a sequence of ride-hail to transit itineraries based on the provided results for + * ride-hail access, ride-hail egress, and a drive transit trip. + * + * @param rideHail2TransitAccessResult the result of the ride-hail-to-transit access leg request, + * containing ride-hail vehicle options and associated data + * @param rideHail2TransitEgressResult the result of the ride-hail-to-transit egress leg request, + * containing ride-hail vehicle options and associated data + * @param driveTransitTrip the returned drive transit trip that will be turned into a + * ridehail transit trip + * @return a vector of possible ride-hail to transit itineraries as embodied trips; returns an empty + * vector if an itinerary cannot be generated + */ private def createRideHail2TransitItin( rideHail2TransitAccessResult: RideHailResponse, rideHail2TransitEgressResult: RideHailResponse, @@ -1093,29 +1146,43 @@ trait ChoosesMode { } else Vector.empty[EmbodiedBeamTrip] } + /** + * Creates a ride-hail transit trip by combining a drive-transit trip, ride-hail access legs, + * and ride-hail egress legs, while adjusting for timing constraints and extra wait times. + * + * @param driveTransitTrip the original drive-transit trip composed of a sequence of legs + * @param tncAccessLeg the sequence of ride-hail access legs used to reach the transit + * @param timeToCustomer time required for the ride-hail vehicle to reach the customer + * @param tncEgressLeg the sequence of ride-hail egress legs used after the transit + * @return an optional ride-hail transit trip combining the input components if timing constraints are satisfied, + * or None if the trip cannot be created due to excessive wait time + */ private def createRideHailTransitTrip( driveTransitTrip: EmbodiedBeamTrip, tncAccessLeg: Vector[EmbodiedBeamLeg], timeToCustomer: Int, tncEgressLeg: Vector[EmbodiedBeamLeg] ): Option[EmbodiedBeamTrip] = { - val transitLegs = driveTransitTrip.legs + val transitLegs = driveTransitTrip.legs.view .dropWhile(leg => !leg.beamLeg.mode.isTransit) .reverse .dropWhile(leg => !leg.beamLeg.mode.isTransit) + .reverse val (extraWaitTimeBuffer, accessLegAdjustment) = tncAccessLeg.filter(_.isRideHail) match { case Vector() => (Int.MaxValue, 0) case rhLegs => + val latenessToFirstTransitLeg = tncAccessLeg.last.beamLeg.endTime - transitLegs.head.beamLeg.startTime max 0 + val startTimeBufferForWaiting = + 300.0 + timeToCustomer.toDouble * 0.25 + latenessToFirstTransitLeg.toDouble val extraWaitTimeBuffer = rhLegs.last.beamLeg.endTime - - tncAccessLeg.map(_.beamLeg.duration).sum - timeToCustomer - _currentTick.get - val startTimeBufferForWaiting = math.max(300.0, timeToCustomer.toDouble * 0.5) - (extraWaitTimeBuffer, startTimeBufferForWaiting.floor.toInt) + tncAccessLeg.map(_.beamLeg.duration).sum - timeToCustomer - _currentTick.get - startTimeBufferForWaiting + (extraWaitTimeBuffer.floor.toInt, startTimeBufferForWaiting.floor.toInt) } - if (extraWaitTimeBuffer >= 300) { + if (extraWaitTimeBuffer > 0) { Some( - EmbodiedBeamTrip( + surroundWithWalkLegsIfNeededAndMakeTrip( Vector( tncAccessLeg.head.copy(beamLeg = tncAccessLeg.head.beamLeg.updateStartTime(tncAccessLeg.head.beamLeg.startTime - accessLegAdjustment) @@ -1172,9 +1239,18 @@ trait ChoosesMode { } } + /** + * Filters the available vehicles based on the current tour strategy. + * Only includes vehicles that align with the conditions defined by the method logic. + * + * @param allAvailableStreetVehicles A vector containing all street vehicles currently available. + * @param currentTourStrategy The strategy object representing the current tour mode and vehicle preferences. + * @return A vector of filtered vehicles or tokens meeting the specified conditions. + */ private def filterAvailableVehicles( allAvailableStreetVehicles: Vector[VehicleOrToken], - currentTourStrategy: TourModeChoiceStrategy + currentTourStrategy: TourModeChoiceStrategy, + onSubTour: Boolean = false ): Vector[VehicleOrToken] = { val tourVehicle = currentTourStrategy.tourVehicle val tourMode = currentTourStrategy.tourMode @@ -1188,7 +1264,8 @@ trait ChoosesMode { case ActualVehicle(beamVehicle) if tourVehicle.contains(beamVehicle.id) => Some(ActualVehicle(beamVehicle)) case ActualVehicle(beamVehicle) if BeamVehicle.isSharedTeleportationVehicle(beamVehicle.id) => Some(ActualVehicle(beamVehicle)) - case ActualVehicle(beamVehicle) if tourVehicle.isEmpty && tourMode.isDefined && beamVehicle.isMustBeDrivenHome => + case ActualVehicle(beamVehicle) + if tourVehicle.isEmpty && tourMode.isDefined && beamVehicle.isMustBeDrivenHome && !onSubTour => logger.debug( s"Person person ${this.id} is already on a walk based tour, and we have access to vehicle " + s" ${beamVehicle.id}, and we're" + @@ -1223,13 +1300,14 @@ trait ChoosesMode { availablePersonalStreetVehicles: Vector[VehicleOrToken], currentTourMode: Option[BeamTourMode], nextActivity: Activity, - maybeTourModeChoiceStrategy: Option[TourModeChoiceStrategy] = None + maybeTourModeChoiceStrategy: Option[TourModeChoiceStrategy] = None, + isSubTour: Boolean = false ): Seq[BeamMode] = { val maybeTourPersonalVehicle = maybeTourModeChoiceStrategy.flatMap(_.tourVehicle) availableModes.intersect(currentTourMode match { case Some(WALK_BASED) if availablePersonalStreetVehicles - .exists(_.vehicle.isMustBeDrivenHome) && isLastTripWithinTour(nextActivity) => + .exists(_.vehicle.isMustBeDrivenHome) && isLastTripWithinTour(nextActivity) && !isSubTour => val requiredEgressModes = availablePersonalStreetVehicles.flatMap { case veh: ActualVehicle => maybeTourPersonalVehicle match { @@ -1257,15 +1335,67 @@ trait ChoosesMode { }) } - def mustBeDrivenHome(vehicle: VehicleOrToken): Boolean = { - vehicle match { - case ActualVehicle(beamVehicle) => - beamVehicle.isMustBeDrivenHome - case _: Token => - false // is not a household vehicle + // Note that remainingAvailableVehicles includes all vehicles that were available, + // and any unused vehicles will be released. + // That's why we remove any drive_transit vehicles after + // replanning -- so they don't get released. + + /** + * Finds the set of modes that were queried based on the routing response, ride hail result, + * and the optional ride hail to transit routing request identifier. + * + * @param routingResponse The response of a routing request, containing the requested modes and details about transit usage. + * @param rideHailResult The result of a ride hail mode request, including whether the request was for pooled or non-pooled ride hail. + * @param rideHail2TransitRoutingRequestId An optional identifier to determine if ride hail to transit was part of the query. + * @return A set of Beam modes that were part of the query, which includes non-ride hail modes, + * direct ride hail modes, and ride hail transit modes. + */ + private def findQueriedModes( + routingResponse: RoutingResponse, + rideHailResult: RideHailResponse, + rideHail2TransitRoutingRequestId: Option[Int] + ): Set[BeamMode] = { + val expectedNonRideHailModes = routingResponse.request match { + case Some(RoutingRequest(_, _, _, withTransit, _, streetVehicles, _, _, _, _, _)) if !withTransit => + streetVehicles.map(_.mode).toSet + case Some(RoutingRequest(_, _, _, true, _, streetVehicles, _, _, _, _, _)) => + streetVehicles + .map(_.mode) + .flatMap { + case CAR => Seq(DRIVE_TRANSIT, CAR) + case BIKE => Seq(BIKE_TRANSIT, BIKE) + case WALK => Seq(WALK_TRANSIT, WALK) + case _ => Seq.empty[BeamMode] + } + .toSet + case _ => Set.empty[BeamMode] + } + val expectedDirectRideHailModes = rideHailResult.request match { + case RideHailRequest(_, _, _, _, _, _, _, _, _, requestTime, _, _, _, _) if requestTime == -1 => + Set.empty[BeamMode] + case RideHailRequest(_, _, _, _, _, asPooled, _, _, _, _, _, _, _, _) if asPooled => + Set(RIDE_HAIL_POOLED) + case RideHailRequest(_, _, _, _, _, _, _, _, _, _, _, _, _, _) => Set(RIDE_HAIL) } + + val expectedRideHailTransitModes = rideHail2TransitRoutingRequestId match { + case Some(_) => Set(RIDE_HAIL_TRANSIT) + case None => Set.empty[BeamMode] + } + + expectedNonRideHailModes ++ expectedDirectRideHailModes ++ expectedRideHailTransitModes } + /** + * Handles the mode and vehicle choice for a person during their current activity in the simulation. + * This function evaluates available transportation options, including ride-hail, walking, and other transit methods, + * based on various criteria such as current location, tour strategy, and vehicle availability. It computes the + * best possible trip options, filters them, and determines the next course of action based on the chosen alternative. + * The method also updates the person's data with the selected trip mode and tour specifics. + * + * @return A transformation of the FSM state, applying the updated mode choice and trip information for the specific person + * if all conditions are met. Only states matching the specified input conditions are handled. + */ private def completeChoiceIfReady: PartialFunction[State, State] = { case FSM.State( _, @@ -1278,7 +1408,7 @@ trait ChoosesMode { parkingResponseIds, Some(rideHailResult), Some(rideHail2TransitRoutingResponse), - _, + rideHail2TransitRoutingRequestId, Some(rideHail2TransitAccessResult), _, Some(rideHail2TransitEgressResult), @@ -1291,7 +1421,8 @@ trait ChoosesMode { _, _, true, - _ + _, + mostRecentFailedBoardingTrip ), _, _, @@ -1323,10 +1454,13 @@ trait ChoosesMode { Vector() } - val combinedItinerariesForChoice = rideHailItinerary ++ addParkingCostToItins( + val combinedItinerariesForChoice = (rideHailItinerary ++ addParkingCostToItins( routingResponse.itineraries, parkingResponses - ) ++ rideHail2TransitIineraries + ) ++ rideHail2TransitIineraries) + .groupBy(t => (t.vehiclesInTrip, t.tripClassifier)) + .map(x => x._2.minBy(_.totalTravelTimeInSecs)) + .toVector def isAvailable(mode: BeamMode): Boolean = combinedItinerariesForChoice.exists(_.tripClassifier == mode) @@ -1338,16 +1472,21 @@ trait ChoosesMode { case _ => } - val newAndTourVehicles = allAvailableStreetVehicles ++ getParentTourStrategy(personData) - .flatMap(_.tourVehicle) - .flatMap(v => beamVehicles.get(v)) - .filterNot(_.vehicle.isSharedVehicle) - .toVector - .groupBy(_.id) - .values - .map(_.head) + val availableParentTourVehicles = getParentTourStrategy(personData) + .flatMap(strategy => + strategy.tourMode match { + case Some(CAR_BASED | BIKE_BASED) => + strategy.tourVehicle + .flatMap(v => beamVehicles.get(v)) + .filterNot(_.vehicle.isSharedVehicle) + case _ => + None // If it's a walk_based tour we assume it was left at a transit stop en_route + } + ) .toVector + val newAndTourVehicles = allAvailableStreetVehicles ++ availableParentTourVehicles + val availableEmergencyVehicles = beamVehicles.filterKeys(k => k.toString.startsWith(f"${this.id.toString}-emergency")).values.toVector @@ -1360,32 +1499,29 @@ trait ChoosesMode { combinedItinerariesForChoice ) + val parentTourStrategy = getParentTourStrategy(personData) + val availableModesForTrips = getAvailableModesGivenTourMode( availableModesForPerson(matsimPlan.getPerson, choosesModeData.excludeModes), newAndTourVehicles, chosenCurrentTourMode, nextAct, - Some(currentTourStrategy) + Some(currentTourStrategy), + parentTourStrategy.nonEmpty ) + if (availableModesForTrips.contains(DRIVE_TRANSIT) && parentTourStrategy.nonEmpty) { + logger.debug("This is a strange situation to be worried about potentially") + } + val filteredItinerariesForChoice = choosesModeData.personData.currentTripMode match { case Some(mode) if mode == DRIVE_TRANSIT || mode == BIKE_TRANSIT => (isFirstOrLastTripWithinTour(nextAct), personData.hasDeparted) match { case (true, false) => combinedItinerariesForChoice.filter(_.tripClassifier == mode) case _ => - combinedItinerariesForChoice.filter(trip => - trip.tripClassifier == WALK_TRANSIT || trip.tripClassifier == RIDE_HAIL_TRANSIT - ) + combinedItinerariesForChoice } - case Some(mode) if mode == WALK_TRANSIT || mode == RIDE_HAIL_TRANSIT => - combinedItinerariesForChoice.filter(trip => - trip.tripClassifier == WALK_TRANSIT || trip.tripClassifier == RIDE_HAIL_TRANSIT - ) - case Some(HOV2_TELEPORTATION) => - combinedItinerariesForChoice.filter(_.tripClassifier == HOV2_TELEPORTATION) - case Some(HOV3_TELEPORTATION) => - combinedItinerariesForChoice.filter(_.tripClassifier == HOV3_TELEPORTATION) case Some(mode) => combinedItinerariesForChoice.filter(_.tripClassifier == mode) case _ => @@ -1398,9 +1534,25 @@ trait ChoosesMode { .filterNot(itin => itin.vehiclesInTrip .filterNot(_.toString.startsWith("body")) - .exists(veh => personData.failedTrips.flatMap(_.vehiclesInTrip).contains(veh)) + .exists(mostRecentFailedBoardingTrip.map(_.beamVehicleId).contains) ) + val currentAct = currentActivity(personData) + + if (beamServices.beamConfig.beam.exchange.output.activity_sim_skimmer.exists(_.primary.enabled)) { + val queriedModes = findQueriedModes(routingResponse, rideHailResult, rideHail2TransitRoutingRequestId) + + // Find modes that were queried but don't have valid itineraries and report failures + queriedModes.diff(combinedItinerariesForChoice.map(_.tripClassifier).toSet) foreach { beamMode => + val possibleActivitySimModes = + determineActivitySimPathTypesFromBeamMode(Some(beamMode), Some(currentAct)) + + createFailedActivitySimSkimmerEvent(currentAct, nextAct, possibleActivitySimModes).foreach(ev => + eventsManager.processEvent(ev) + ) + } + } + val attributesOfIndividual = matsimPlan.getPerson.getCustomAttributes .get("beam-attributes") @@ -1411,16 +1563,21 @@ trait ChoosesMode { goto(FinishingModeChoice) using choosesModeData.copy( personData = personData.copy( currentTourMode = chosenCurrentTourMode, + currentTripMode = Some(chosenTrip.tripClassifier), + passengerSchedule = PassengerSchedule(), + restOfCurrentTrip = List.empty[EmbodiedBeamLeg], currentTourPersonalVehicle = chosenCurrentTourMode match { // if they're on a walk based tour we let them keep access to whatever personal vehicle they used on the // first leg or in a parent tour case Some(WALK_BASED) => choosesModeData.personData.currentTourPersonalVehicle // Otherwise they keep track of the chosen vehicle case _ => - chosenCurrentTourPersonalVehicle.getOrElse( - chosenTrip, - choosesModeData.personData.currentTourPersonalVehicle - ) + chosenCurrentTourPersonalVehicle + .get(chosenTrip) + .flatten // If we're on a subtour and it uses no vehicle, we still pass on any tour vehicle from parent tours + .orElse( + choosesModeData.personData.currentTourPersonalVehicle + ) } ), pendingChosenTrip = Some(chosenTrip), @@ -1428,6 +1585,23 @@ trait ChoosesMode { ) } + if (personData.numberOfReplanningAttempts > 20) { + logger.warn( + s"Agent ${this.id} exceeded 20 replanning attempts at ${choosesModeData.currentLocation}. " + + s"Creating emergency walking trip. State: $choosesModeData" + ) + + val bushwhackingTrip = RoutingWorker.createBushwackingTrip( + choosesModeData.currentLocation.loc, + nextActivity(choosesModeData.personData).get.getCoord, + _currentTick.get, + body.toStreetVehicle, + geo + ) + + gotoFinishingModeChoice(bushwhackingTrip) + } + val currentPlanMode = _experiencedBeamPlan .getStrategy[TripModeChoiceStrategy](_experiencedBeamPlan.getTripContaining(nextAct)) .mode @@ -1440,9 +1614,11 @@ trait ChoosesMode { Some(matsimPlan.getPerson) ) match { case Some(chosenTrip) if !currentPlanMode.contains(CAV) => - filteredItinerariesForChoice.foreach { + // Send non-chosen trips to skimmer if configured to do so + combinedItinerariesForChoice.foreach { case possibleTrip - if (possibleTrip != chosenTrip) && beamScenario.beamConfig.beam.router.skim.sendNonChosenTripsToSkimmer => + if (possibleTrip != chosenTrip) && beamScenario.beamConfig.beam.router.skim.sendNonChosenTripsToSkimmer && !choosesModeData.personData.currentTourMode + .contains(FREIGHT_TOUR) => generateSkimData( possibleTrip.legs.lastOption.map(_.beamLeg.endTime).getOrElse(_currentTick.get), possibleTrip, @@ -1469,14 +1645,32 @@ trait ChoosesMode { choosesModeData.copy( personData = personData.copy( currentTourMode = chosenCurrentTourMode, + currentTripMode = Some(chosenTrip.tripClassifier), currentTourPersonalVehicle = chosenCurrentTourPersonalVehicle - .getOrElse(chosenTrip, personData.currentTourPersonalVehicle) + .get(chosenTrip) + .flatten // If we're on a subtour and it uses no vehicle, we still pass on any tour vehicle from parent tours + .orElse(personData.currentTourPersonalVehicle) ), pendingChosenTrip = Some(chosenTrip), availableAlternatives = availableAlts ) goto(FinishingModeChoice) using dataForNextStep case None => + if (!choosesModeData.personData.currentTourMode.contains(FREIGHT_TOUR)) { + combinedItinerariesForChoice.foreach { possibleTrip => + logger.debug( + f"Sending trip ${possibleTrip} to skimmer because it didn't match required mode ${currentPlanMode}" + ) + generateSkimData( + routingResponse.request.map(_.departureTime).getOrElse(_currentTick.get), + possibleTrip, + failedTrip = false, + personData.currentActivityIndex, + currentActivity(personData), + nextActivity(personData) + ) + } + } choosesModeData.personData.currentTripMode match { case Some(CAV) => // Special case, if you are using household CAV, no choice was necessary you just use this mode @@ -1553,43 +1747,139 @@ trait ChoosesMode { personData.copy(currentTourPersonalVehicle = None) ) case Some(mode) => - val correctedTripMode = correctCurrentTripModeAccordingToRules(None, personData, availableModesForTrips) - if (correctedTripMode != personData.currentTripMode) { - val nextActLoc = nextActivity(choosesModeData.personData).get.getCoord - val currentAct = currentActivity(personData) - val odFailedSkimmerEvent = createFailedODSkimmerEvent(currentAct, nextAct, mode) - eventsManager.processEvent( - odFailedSkimmerEvent - ) - if (beamServices.beamConfig.beam.exchange.output.activity_sim_skimmer.exists(_.primary.enabled)) { - val possibleActivitySimModes = - determineActivitySimPathTypesFromBeamMode( - choosesModeData.personData.currentTripMode, - Some(currentAct) - ) - createFailedActivitySimSkimmerEvent(currentAct, nextAct, possibleActivitySimModes).foreach(ev => - eventsManager.processEvent(ev) + val odFailedSkimmerEvent = createFailedODSkimmerEvent(currentAct, nextAct, mode) + eventsManager.processEvent(odFailedSkimmerEvent) + + // Generate activity sim failure events if enabled + if (beamServices.beamConfig.beam.exchange.output.activity_sim_skimmer.exists(_.primary.enabled)) { + val possibleActivitySimModes = + determineActivitySimPathTypesFromBeamMode( + choosesModeData.personData.currentTripMode, + Some(currentAct) ) - } - eventsManager.processEvent( - new ReplanningEvent( - _currentTick.get, - Id.createPersonId(id), - getReplanningReasonFrom( - choosesModeData.personData, - ReservationErrorCode.RouteNotAvailableForChosenMode.entryName - ), - choosesModeData.currentLocation.loc.getX, - choosesModeData.currentLocation.loc.getY, - nextActLoc.getX, - nextActLoc.getY + createFailedActivitySimSkimmerEvent(currentAct, nextAct, possibleActivitySimModes).foreach(ev => + eventsManager.processEvent(ev) + ) + } + createAndProcessModeChoiceEvent(None, _currentTick.get, choosesModeData) + + // Create replanning event + eventsManager.processEvent( + new ReplanningEvent( + _currentTick.get, + Id.createPersonId(id), + getReplanningReasonFrom( + choosesModeData.personData, + ReservationErrorCode.RouteNotAvailableForChosenMode.entryName + ), + choosesModeData.currentLocation.loc.getX, + choosesModeData.currentLocation.loc.getY, + nextAct.getCoord.getX, + nextAct.getCoord.getY + ) + ) + + if ( + isFirstTripWithinTour( + currentActivity(choosesModeData.personData) + ) && !choosesModeData.isWithinTripReplanning + ) { + logger.debug("Resetting tour mode to none because we haven't left yet") + updateTourModeStrategy( + None, + None, + nextActivity(choosesModeData.personData).get, + choosesModeData.allAvailableStreetVehicles + ) + } + + // Available vehicles filtering for replanning + val availableVehicles = + if (mode.isTeleportation) + // Remove teleportation vehicle since we can't use it for non-teleportation mode + choosesModeData.allAvailableStreetVehicles.filterNot(vehicle => + BeamVehicle.isSharedTeleportationVehicle(vehicle.id) ) - ) //give another chance to make a choice without predefined mode - //TODO: Do we need to do anything with tour mode here? - gotoChoosingModeWithoutPredefinedMode(choosesModeData) + else choosesModeData.allAvailableStreetVehicles + + // If we've done a comprehensive routing query, we can reuse results without more routing + if ( + choosesModeData.routingResponse.exists( + _.request.exists(_.withTransit) + ) && choosesModeData.rideHail2TransitRoutingRequestId.nonEmpty && !choosesModeData.isWithinTripReplanning && personData.numberOfReplanningAttempts == 0 + ) { + self ! RetryModeChoice(getCurrentTriggerId.get) + val updatedTripStrategy = TripModeChoiceStrategy(None) + _experiencedBeamPlan.putStrategy( + _experiencedBeamPlan.getTripContaining(nextActivity(choosesModeData.personData).get), + updatedTripStrategy + ) + + stay() using choosesModeData.copy( + personData = personData.copy( + currentTripMode = None, + numberOfReplanningAttempts = personData.numberOfReplanningAttempts + 1 + ), + allAvailableStreetVehicles = availableVehicles, + routingFinished = true, + excludeModes = choosesModeData.excludeModes ++ choosesModeData.personData.currentTripMode + ) } else { - val expensiveWalkTrip = createExpensiveWalkTrip(currentPersonLocation, nextAct, routingResponse) - gotoFinishingModeChoice(expensiveWalkTrip) + val (updatedVehicles, currentTourVehicle) = + if ( + (mode == DRIVE_TRANSIT || mode == BIKE_TRANSIT) && (isLastTripWithinTour( + nextAct + ) || personData.numberOfReplanningAttempts > 5) && personData.currentTourPersonalVehicle.isDefined + ) { + // Abandon the vehicle because we have no route to get it home + val vehicleId = personData.currentTourPersonalVehicle.get + logger.warn( + s"Agent ${this.id} is abandoning vehicle $vehicleId after ${personData.numberOfReplanningAttempts + 1} " + + s"failed attempts to find a route to take it home on a ${mode.toString} trip." + ) + + val remainingVehicles = availableVehicles.filterNot(v => v.id == vehicleId) + updateTourModeStrategy( + currentTourStrategy.tourMode, + None, + nextActivity(choosesModeData.personData).get, + remainingVehicles + ) + // Release the vehicle + if (beamVehicles.contains(vehicleId)) { + val vehicle = beamVehicles(vehicleId).vehicle + vehicle.setMustBeDrivenHome(false) + vehicle.unsetDriver() + beamVehicles.remove(vehicleId) + } + (remainingVehicles, None) + } else { + (availableVehicles, personData.currentTourPersonalVehicle) + } + // Need to gather more routing options + self ! MobilityStatusResponse(availableVehicles, getCurrentTriggerId.get) + logger.debug( + "Person {} replanning because planned mode {} not available", + body.id, + mode.toString + ) + val updatedTripStrategy = TripModeChoiceStrategy(None) + _experiencedBeamPlan.putStrategy( + _experiencedBeamPlan.getTripContaining(nextActivity(choosesModeData.personData).get), + updatedTripStrategy + ) + stay() using ChoosesModeData( + personData = personData.copy( + currentTripMode = None, + numberOfReplanningAttempts = personData.numberOfReplanningAttempts + 1, + currentTourPersonalVehicle = currentTourVehicle + ), + allAvailableStreetVehicles = updatedVehicles, + currentLocation = choosesModeData.currentLocation, + excludeModes = choosesModeData.excludeModes ++ choosesModeData.personData.currentTripMode, + parkingRequestIds = Map.empty, // Clear any pending parking requests + mostRecentDeniedBoardingLeg = choosesModeData.mostRecentDeniedBoardingLeg + ) } case _ => // Bad things happen but we want them to continue their day, so we signal to downstream that trip should be made to be expensive @@ -1677,99 +1967,26 @@ trait ChoosesMode { .legs .head } - val expensiveWalkTrip = EmbodiedBeamTrip( - Vector(originalWalkTripLeg.copy(replanningPenalty = 10.0)) - ) - expensiveWalkTrip - } - - private def gotoChoosingModeWithoutPredefinedMode(choosesModeData: ChoosesModeData) = { - // TODO: Check modes for subsequent trips here - val onFirstTripWithinTour: Boolean = isFirstTripWithinTour(currentActivity(choosesModeData.personData)) - val withinReplanning: Boolean = choosesModeData.isWithinTripReplanning - val agentStillAtTourOrigin: Boolean = onFirstTripWithinTour && !withinReplanning - val outcomeTourMode = if (agentStillAtTourOrigin) { None } - else { Some(WALK_BASED) } - val parentTourVehicle = getParentTourStrategy(choosesModeData.personData).flatMap(_.tourVehicle) - val isAccessEgressInTour: Boolean = choosesModeData.personData.currentTourMode.contains(WALK_BASED) - val newTourVehicle = choosesModeData.personData.currentTourPersonalVehicle match { - case Some(id) if beamVehicles.contains(id) => - if (isAccessEgressInTour && !agentStillAtTourOrigin) { - /* - * This code block only runs when someone needs to re-plan and re-do mode choice. - * If for instance they were going to take a bike trip but no bike route was available - * they need to release the bike so others can use it. - * But if they're in the middle of a tour and just can't find a transit route, - * for instance, but they took drive_transit on their first leg and need to take it home, - * we keep the original vehicle in beamVehicles so we can use it later - * - * The problem is that when someone gets a resourceCapacityExhausted error on the first leg of a drive_transit tour, - * the existing logic thinks that we're in the first scenario (didn't use a vehicle so we can release it) - * rather than the second one (have already used a vehicle and need to return to it at the end of our tour). - * - * For that matter, we are adding "choosesModeData.isWithinTripReplanning". As long as we are still replanning - * we don't release the vehicle until their last tour trip of their tour. - * - * e.g., they'll just get on the next train, go about their drive_transit tour, and - * then take drive_transit as the mode for the last leg of their tour and pick up their car on the way home - * */ - Some(id) - } else if (parentTourVehicle.isEmpty) { - val vehicle = beamVehicles(id).vehicle - vehicle.setMustBeDrivenHome(false) - beamVehicles.remove(vehicle.id) - vehicle.getManager.get ! ReleaseVehicle(vehicle, getCurrentTriggerId.get) - if (!agentStillAtTourOrigin) { - logger.warn( - s"Abandoning vehicle $id because no return ${choosesModeData.personData.currentTripMode} " + - s"itinerary is available" - ) - } else { - logger.debug( - s"Not keeping vehicle $id because no ${choosesModeData.personData.currentTripMode} " + - s"is available" - ) - } - None - } else { - parentTourVehicle - } - case _ => None - } + val minDuration = + if (originalWalkTripLeg.beamLeg.duration < beamServices.beamConfig.beam.agentsim.schedulerParallelismWindow) { + logger.info( + s"Agent ${this.id}'s walk trip duration ${originalWalkTripLeg.beamLeg.duration} is less than the minimum " + + s"of ${beamServices.beamConfig.beam.agentsim.schedulerParallelismWindow}. Setting it to the minimum." + ) + beamServices.beamConfig.beam.agentsim.schedulerParallelismWindow + } else { + originalWalkTripLeg.beamLeg.duration + } - if (choosesModeData.personData.currentTripMode.get.isTeleportation) { - //we need to remove our teleportation vehicle since we cannot use it if it's not a teleportation mode { - val availableVehicles = choosesModeData.allAvailableStreetVehicles.filterNot(vehicle => - BeamVehicle.isSharedTeleportationVehicle(vehicle.id) - ) - self ! MobilityStatusResponse(availableVehicles, getCurrentTriggerId.get) - stay() - } else { - val updatedTripStrategy = TripModeChoiceStrategy(None) - _experiencedBeamPlan.putStrategy( - _experiencedBeamPlan.getTripContaining(nextActivity(choosesModeData.personData).get), - updatedTripStrategy - ) - updateTourModeStrategy( - outcomeTourMode, - newTourVehicle, - nextActivity(choosesModeData.personData).get, - choosesModeData.allAvailableStreetVehicles + val expensiveWalkTrip = EmbodiedBeamTrip( + Vector( + originalWalkTripLeg.copy( + replanningPenalty = 10.0, + beamLeg = originalWalkTripLeg.beamLeg.scaleToNewDuration(minDuration) + ) ) - goto(ChoosingMode) - } using choosesModeData.copy( - personData = choosesModeData.personData.copy( - currentTripMode = None, - currentTourMode = outcomeTourMode, - currentTrip = None, - restOfCurrentTrip = List.empty, - currentTourPersonalVehicle = newTourVehicle, - numberOfReplanningAttempts = choosesModeData.personData.numberOfReplanningAttempts + 1 - ), - currentLocation = choosesModeData.currentLocation, - excludeModes = choosesModeData.excludeModes ++ choosesModeData.personData.currentTripMode ) - + expensiveWalkTrip } /** @@ -1938,79 +2155,12 @@ trait ChoosesMode { val chosenTrip = makeFinalCorrections(pendingTrip, tick, correctedActivityEndTime) - // Write start and end links of chosen route into Activities. - // We don't check yet whether the incoming and outgoing routes agree on the link an Activity is on. - // Our aim should be that every transition from a link to another link be accounted for. - val headOpt = chosenTrip.legs.headOption - .flatMap(_.beamLeg.travelPath.linkIds.headOption) - val lastOpt = chosenTrip.legs.lastOption - .flatMap(_.beamLeg.travelPath.linkIds.lastOption) - if (headOpt.isDefined && lastOpt.isDefined) { - _experiencedBeamPlan - .activities(data.personData.currentActivityIndex) - .setLinkId(Id.createLinkId(headOpt.get)) - _experiencedBeamPlan - .activities(data.personData.currentActivityIndex + 1) - .setLinkId(Id.createLinkId(lastOpt.get)) - } else { - val origin = beamServices.geo.utm2Wgs( - _experiencedBeamPlan - .activities(data.personData.currentActivityIndex) - .getCoord - ) - val destination = beamServices.geo.utm2Wgs( - _experiencedBeamPlan - .activities(data.personData.currentActivityIndex + 1) - .getCoord - ) - val linkRadiusMeters = beamScenario.beamConfig.beam.routing.r5.linkRadiusMeters - _experiencedBeamPlan - .activities(data.personData.currentActivityIndex) - .setLinkId( - Id.createLinkId( - beamServices.geo.getNearestR5Edge(transportNetwork.streetLayer, origin, linkRadiusMeters) - ) - ) - _experiencedBeamPlan - .activities(data.personData.currentActivityIndex + 1) - .setLinkId( - Id.createLinkId( - beamServices.geo.getNearestR5Edge(transportNetwork.streetLayer, destination, linkRadiusMeters) - ) - ) - } - - val tripId: String = _experiencedBeamPlan.trips - .lift(data.personData.currentActivityIndex + 1) match { - case Some(trip) => - trip.leg.map(l => Option(l.getAttributes.getAttribute("trip_id")).getOrElse("").toString).getOrElse("") - case None => "" - } + createAndProcessModeChoiceEvent(Some(chosenTrip), tick, data) val destinationActivity = nextActivity(data.personData).get val isFirstTrip = isFirstTripWithinTour(destinationActivity) val isLastTrip = isLastTripWithinTour(destinationActivity) - val initialTourMode = data.personData.currentTourMode - - val modeChoiceEvent = new ModeChoiceEvent( - tick, - id, - chosenTrip.tripClassifier.value, - initialTourMode.map(_.value).getOrElse(""), - data.expectedMaxUtilityOfLatestChoice.getOrElse[Double](Double.NaN), - _experiencedBeamPlan.activities(data.personData.currentActivityIndex).getLinkId.toString, - data.availableAlternatives.get, - data.availablePersonalStreetVehicles.nonEmpty, - chosenTrip.legs.view.map(_.beamLeg.travelPath.distanceInM).sum, - _experiencedBeamPlan.tourIndexOfElement(destinationActivity), - chosenTrip, - _experiencedBeamPlan.activities(data.personData.currentActivityIndex).getType, - destinationActivity.getType, - tripId - ) - eventsManager.processEvent(modeChoiceEvent) - data.personData.currentTripMode match { case Some(mode) if mode.isTeleportation => scheduler ! CompletionNotice( @@ -2048,14 +2198,23 @@ trait ChoosesMode { var isCurrentPersonalVehicleVoided = false vehiclesNotUsed.collect { case ActualVehicle(vehicle) if data.personData.currentTourPersonalVehicle.contains(vehicle.id) => - if ( - data.personData.currentTourMode - .contains(WALK_BASED) && (!isFirstTripWithinTour(destinationActivity) || data.isWithinTripReplanning) - ) { - logger.debug( - s"We're keeping vehicle ${vehicle.id} even though it isn't used in this trip " + - s"because we need it for egress at the end of the tour" - ) + if (data.personData.currentTourMode.contains(WALK_BASED)) { + // Note: Removed this condition: !isFirstTripWithinTour(destinationActivity) + if ( + getCurrentTourStrategy(data.personData).tourVehicle.contains( + vehicle.id + ) || data.isWithinTripReplanning + ) { + logger.debug( + s"Person ${this.id} is keeping vehicle ${vehicle.id} even though it isn't used in this trip " + + s"because we need it for egress at the end of the tour" + ) + } else if (getParentTourStrategy(data.personData).isEmpty) { + logger.warn( + s"Person ${this.id} is keeping vehicle ${vehicle.id} even though it's not stored in our " + + s"tourModeStrategy, which is ${getCurrentTourStrategy(data.personData)}" + ) + } } else if (getParentTourStrategy(data.personData).exists(s => s.tourVehicle.contains(vehicle.id))) { logger.debug( s"We're keeping vehicle ${vehicle.id} even though it isn't used in this trip " + @@ -2064,7 +2223,7 @@ trait ChoosesMode { } else { if (!data.isWithinTripReplanning) { logger.warn( - s"We are going to give up vehicle " + + s"Person ${this.id} is going to give up vehicle " + s"${vehicle.id} because it's not used in our next leg. Perhaps it was created unnecessarily? - $data" ) } @@ -2083,7 +2242,7 @@ trait ChoosesMode { beamVehicles.remove(vehicle.id) vehicle.getManager match { case Some(manager) if BeamVehicle.isEmergencyVehicle(vehicle.id) && !isLastTrip => - logger.debug("Releasing emergency vehicle") + logger.debug(f"Releasing emergency vehicle for person ${this.id}") manager ! ReleaseVehicle(vehicle, triggerId) case Some(manager) => manager ! ReleaseVehicle(vehicle, triggerId) case _ => logger.warn(s"Giving up vehicle ${vehicle.id}, which doesn't have a manager set") @@ -2116,7 +2275,7 @@ trait ChoosesMode { .filter(!_.vehicle.isSharedVehicle) .find { veh => (chosenTrip.tripClassifier, data.personData.currentTourMode) match { - case (_, Some(FREIGHT_TOUR)) => veh.vehicle.isFreightVehicle + case (_, Some(FREIGHT_TOUR)) => veh.vehicle.isFreight case (_, Some(CAR_BASED)) => veh.vehicle.beamVehicleType.vehicleCategory == VehicleCategory.Car case (_, Some(BIKE_BASED)) => veh.vehicle.beamVehicleType.vehicleCategory == VehicleCategory.Bike @@ -2158,12 +2317,22 @@ trait ChoosesMode { ) case Some(strategyMode) if strategyMode == chosenTrip.tripClassifier => case Some(strategyMode @ (DRIVE_TRANSIT | BIKE_TRANSIT | RIDE_HAIL_TRANSIT)) - if (chosenTrip.tripClassifier == WALK_TRANSIT) && data.isWithinTripReplanning => - logger.debug(f"Assigning replanning walk_transit trip as part of planned $strategyMode trip") + if data.isWithinTripReplanning => + if (chosenTrip.tripClassifier != WALK_TRANSIT) { + logger.info( + f"Switching to a ${chosenTrip.tripClassifier} trip after a failed $strategyMode trip after departure" + ) + } else { + logger.debug(f"Assigning replanning walk_transit trip as part of planned $strategyMode trip") + } case Some(otherMode) if currentTourPersonalVehicle.isDefined & isLastTrip => logger.warn( s"Chose a ${chosenTrip.tripClassifier} trip with a $otherMode leg in our plans. This is because " + - s"we need to tour vehicle ${currentTourPersonalVehicle.get} back home" + s"we need to take tour vehicle ${currentTourPersonalVehicle.get} back home. Updating it in plan" + ) + _experiencedBeamPlan.putStrategy( + _experiencedBeamPlan.getTripContaining(destinationActivity), + TripModeChoiceStrategy(Some(chosenTrip.tripClassifier)) ) case Some(otherMode) => logger.error( @@ -2206,6 +2375,17 @@ trait ChoosesMode { } } + /** + * Constructs and sends routing and mode choice requests to the appropriate services (e.g., router, ride hail manager). + * + * @param currentTripMode Optionally, the current mode of transportation for the trip. + * @param currentTourMode Optionally, the overall mode chosen for the tour. + * @param hasRideHail A flag indicating if ride hail services are available. + * @param availableVehicles A vector of available vehicles or tokens for the current person. + * @param choosesModeData Data structure containing information for mode choice decision-making. + * @param triggerId An identifier for the triggering event of the requests. + * @return A tuple containing placeholders for chooses mode response, an optional request ID, and an updated vector of vehicles or tokens. + */ private def makeRoutingRequests( currentTripMode: Option[BeamMode], currentTourMode: Option[BeamTourMode], @@ -2241,7 +2421,6 @@ trait ChoosesMode { possibleEgressVehicles = possibleEgressVehicles, triggerId = getCurrentTriggerIdOrGenerate ) - } def makeRideHailRequest(): Unit = { @@ -2258,7 +2437,6 @@ trait ChoosesMode { triggerId = getCurrentTriggerIdOrGenerate, asPooled = !choosesModeData.personData.currentTripMode.contains(RIDE_HAIL) ) - // println(s"requesting: ${inquiry.requestId}") rideHailManager ! inquiry } @@ -2274,7 +2452,7 @@ trait ChoosesMode { withTransit = true, Some(id), Vector(bodyStreetVehicleRequestParam, dummyRHVehicle.copy(locationUTM = currentSpaceTime)), - streetVehiclesUseIntermodalUse = AccessAndEgress, + streetVehiclesUseIntermodalUse = rideHailTransitIntermodalUse, triggerId = getCurrentTriggerIdOrGenerate ) router ! theRequest @@ -2307,6 +2485,44 @@ trait ChoosesMode { var requestId: Option[Int] = None // Form and send requests var householdVehiclesWereNotAvailable = false // to replan when personal vehicles are not available + + // Check if we should always query certain modes for skimming + val shouldAlwaysQueryTransit = beamScenario.beamConfig.beam.exchange.output.generateSkimsForAllModes + val shouldAlwaysQueryRideHailTransit = + shouldAlwaysQueryTransit & beamScenario.beamConfig.beam.exchange.output.generateSkimsForRideHailTransit + + val bufferToUse = choosesModeData.mostRecentDeniedBoardingLeg match { + case Some(transitLeg) => + // Get the departure time of the failed transit leg + val failedTransitDepartureTime = transitLeg.beamLeg.startTime + // Buffer to skip just past this transit departure + (failedTransitDepartureTime - _currentTick.get) + BUFFER_PER_REPLANNING_ATTEMPT_IN_SEC + + case None => + // Fallback to standard buffer if no failed transit leg + choosesModeData.personData.numberOfReplanningAttempts * BUFFER_PER_REPLANNING_ATTEMPT_IN_SEC + } + + // Track ride hail requests that have already been made + val (alreadyRequestedRideHail, alreadyRequestedRideHailTransit) = + if (shouldAlwaysQueryTransit) { + currentTripMode match { + case Some(RIDE_HAIL | RIDE_HAIL_POOLED) if choosesModeData.isWithinTripReplanning => (false, false) + case Some(RIDE_HAIL_TRANSIT) if choosesModeData.isWithinTripReplanning => + makeRideHailRequest() + (true, false) + case _ if hasRideHail => + makeRideHailRequest() + if (shouldAlwaysQueryRideHailTransit) { + requestId = makeRideHailTransitRoutingRequest(bodyStreetVehicle) + (true, true) + } else { + (true, false) + } + case _ => (false, false) + } + } else { (false, false) } + currentTripMode match { case None => if (hasRideHail) { @@ -2315,8 +2531,8 @@ trait ChoosesMode { withRideHail = true, withRideHailTransit = !choosesModeData.isWithinTripReplanning ) - makeRideHailRequest() - if (!choosesModeData.isWithinTripReplanning) { + if (!alreadyRequestedRideHail) { makeRideHailRequest() } + if (!choosesModeData.isWithinTripReplanning & !alreadyRequestedRideHailTransit) { requestId = makeRideHailTransitRoutingRequest(bodyStreetVehicle) } } else { @@ -2348,36 +2564,57 @@ trait ChoosesMode { } :+ bodyStreetVehicle makeRequestWith( - withTransit = availableModesGivenTourMode.exists(_.isTransit), + withTransit = availableModesGivenTourMode.exists(_.isTransit) || shouldAlwaysQueryTransit, availableStreetVehiclesGivenTourMode, - possibleEgressVehicles = dummySharedVehicles + possibleEgressVehicles = dummySharedVehicles, + departureBuffer = bufferToUse ) case Some(WALK) => - responsePlaceholders = makeResponsePlaceholders(withRouting = true) - makeRequestWith(withTransit = true, Vector(bodyStreetVehicle)) + responsePlaceholders = makeResponsePlaceholders( + withRouting = true, + withRideHail = alreadyRequestedRideHail, + withRideHailTransit = alreadyRequestedRideHailTransit + ) + makeRequestWith(withTransit = shouldAlwaysQueryTransit, Vector(bodyStreetVehicle)) case Some(WALK_TRANSIT) => - responsePlaceholders = makeResponsePlaceholders(withRouting = true) + responsePlaceholders = makeResponsePlaceholders( + withRouting = true, + withRideHail = alreadyRequestedRideHail, + withRideHailTransit = alreadyRequestedRideHailTransit + ) makeRequestWith( withTransit = true, Vector(bodyStreetVehicle), - departureBuffer = choosesModeData.personData.numberOfReplanningAttempts * 5 + departureBuffer = bufferToUse ) case Some(CAV) => // Request from household the trip legs to put into trip householdRef ! CavTripLegsRequest(bodyVehiclePersonId, currentActivity(choosesModeData.personData)) - responsePlaceholders = makeResponsePlaceholders(withPrivateCAV = true) + responsePlaceholders = makeResponsePlaceholders( + withPrivateCAV = true, + withRideHail = alreadyRequestedRideHail, + withRideHailTransit = alreadyRequestedRideHailTransit + ) case Some(HOV2_TELEPORTATION) => val vehicles = availableVehicles .filter(v => BeamVehicle.isSharedTeleportationVehicle(v.id)) .map(car_vehicle => car_vehicle.streetVehicle.copy(mode = CAR_HOV2)) - makeRequestWith(withTransit = false, vehicles :+ bodyStreetVehicle) - responsePlaceholders = makeResponsePlaceholders(withRouting = true) + makeRequestWith(withTransit = shouldAlwaysQueryTransit, vehicles :+ bodyStreetVehicle) + responsePlaceholders = makeResponsePlaceholders( + withRouting = true, + withRideHail = alreadyRequestedRideHail, + withRideHailTransit = alreadyRequestedRideHailTransit + ) case Some(HOV3_TELEPORTATION) => val vehicles = availableVehicles .filter(v => BeamVehicle.isSharedTeleportationVehicle(v.id)) .map(car_vehicle => car_vehicle.streetVehicle.copy(mode = CAR_HOV3)) - makeRequestWith(withTransit = false, vehicles :+ bodyStreetVehicle) - responsePlaceholders = makeResponsePlaceholders(withRouting = true) + makeRequestWith(withTransit = shouldAlwaysQueryTransit, vehicles :+ bodyStreetVehicle) + responsePlaceholders = makeResponsePlaceholders( + withRouting = true, + withRideHail = alreadyRequestedRideHail, + withRideHailTransit = alreadyRequestedRideHailTransit + ) case Some(tripMode @ (CAR | BIKE | CAR_HOV2 | CAR_HOV3)) => val maybeLeg = _experiencedBeamPlan.getPlanElements .get(_experiencedBeamPlan.getPlanElements.indexOf(nextAct) - 1) match { @@ -2400,14 +2637,26 @@ trait ChoosesMode { nextAct.getCoord, triggerId ) - responsePlaceholders = makeResponsePlaceholders(withRouting = true) + responsePlaceholders = makeResponsePlaceholders( + withRouting = true, + withRideHail = alreadyRequestedRideHail, + withRideHailTransit = alreadyRequestedRideHailTransit + ) case Some(vehicle) => logger.error(s"Agent ${this.id} is on a ${tripMode.value} trip but has vehicle ${vehicle.toString}") - makeRequestWith(withTransit = false, Vector(bodyStreetVehicle)) - responsePlaceholders = makeResponsePlaceholders(withRouting = true) + makeRequestWith(withTransit = shouldAlwaysQueryTransit, Vector(bodyStreetVehicle)) + responsePlaceholders = makeResponsePlaceholders( + withRouting = true, + withRideHail = alreadyRequestedRideHail, + withRideHailTransit = alreadyRequestedRideHailTransit + ) case _ => - makeRequestWith(withTransit = false, Vector(bodyStreetVehicle)) - responsePlaceholders = makeResponsePlaceholders(withRouting = true) + makeRequestWith(withTransit = shouldAlwaysQueryTransit, Vector(bodyStreetVehicle)) + responsePlaceholders = makeResponsePlaceholders( + withRouting = true, + withRideHail = alreadyRequestedRideHail, + withRideHailTransit = alreadyRequestedRideHailTransit + ) logger.error( "No vehicle available for existing route of person {} trip of mode {} even though it was created in their plans", this.id, @@ -2425,6 +2674,7 @@ trait ChoosesMode { if ( beamScenario.beamConfig.beam.agentsim.agents.vehicles.replanOnTheFlyWhenHouseholdVehiclesAreNotAvailable && vehicles.isEmpty ) { + val currentCoordWgs = beamServices.geo.utm2Wgs(currentPersonLocation.loc) eventsManager.processEvent( new ReplanningEvent( departTime, @@ -2433,18 +2683,27 @@ trait ChoosesMode { choosesModeData.personData, ReservationErrorCode.HouseholdVehicleNotAvailable.entryName ), - currentPersonLocation.loc.getX, - currentPersonLocation.loc.getY + currentCoordWgs.getX, + currentCoordWgs.getY ) ) householdVehiclesWereNotAvailable = true logger.warn("No HH vehicle available so going back to replanning") } - makeRequestWith(withTransit = householdVehiclesWereNotAvailable, vehicles :+ bodyStreetVehicle) - responsePlaceholders = - makeResponsePlaceholders(withRouting = true, withRideHail = householdVehiclesWereNotAvailable) - if (householdVehiclesWereNotAvailable) { + makeRequestWith( + withTransit = householdVehiclesWereNotAvailable | shouldAlwaysQueryTransit, + vehicles :+ bodyStreetVehicle + ) + responsePlaceholders = makeResponsePlaceholders( + withRouting = true, + withRideHail = householdVehiclesWereNotAvailable | alreadyRequestedRideHail, + withRideHailTransit = householdVehiclesWereNotAvailable | alreadyRequestedRideHailTransit + ) + if (householdVehiclesWereNotAvailable & !alreadyRequestedRideHail) { makeRideHailRequest() + if (!choosesModeData.isWithinTripReplanning & !alreadyRequestedRideHailTransit) { + requestId = makeRideHailTransitRoutingRequest(bodyStreetVehicle) + } } } case Some(mode @ (DRIVE_TRANSIT | BIKE_TRANSIT)) => @@ -2462,70 +2721,91 @@ trait ChoosesMode { makeRequestWith( withTransit = true, filterStreetVehiclesForQuery(availableVehicles.map(_.streetVehicle), vehicleMode) - :+ bodyStreetVehicle + :+ bodyStreetVehicle, + departureBuffer = bufferToUse + ) + responsePlaceholders = makeResponsePlaceholders( + withRouting = true, + withRideHail = alreadyRequestedRideHail, + withRideHailTransit = alreadyRequestedRideHailTransit ) - responsePlaceholders = makeResponsePlaceholders(withRouting = true) } else { // Reset available vehicles so we don't release our car that we've left during this replanning resetVehicles = true makeRequestWith( withTransit = true, Vector(bodyStreetVehicle), - departureBuffer = - choosesModeData.personData.numberOfReplanningAttempts * BUFFER_PER_REPLANNING_ATTEMPT_IN_SEC + departureBuffer = bufferToUse + ) + responsePlaceholders = makeResponsePlaceholders( + withRouting = true, + withRideHail = alreadyRequestedRideHail, + withRideHailTransit = alreadyRequestedRideHailTransit ) - responsePlaceholders = makeResponsePlaceholders(withRouting = true) } case (`lastTripIndex`, Some(currentTourPersonalVehicle)) => + val vehiclesForRouting = availableVehicles + .map(_.streetVehicle) + .filter(_.id == currentTourPersonalVehicle) + val intermodalUse: IntermodalUse = if (vehiclesForRouting.isEmpty) { + logger.error( + s"Agent ${this.id} has tour vehicle ${currentTourPersonalVehicle.toString} in PersonData but " + + s"has no available vehicles for routing on egress leg of drive transit trip" + ) + AccessAndOrEgress + } else { + Egress + } // At the end of the tour, only drive home a vehicle that we have also taken away from there. makeRequestWith( withTransit = true, - vehicles = Vector(bodyStreetVehicle), - streetVehiclesIntermodalUse = Access, - possibleEgressVehicles = availableVehicles - .map(_.streetVehicle) - .filter(_.id == currentTourPersonalVehicle) + vehiclesForRouting :+ bodyStreetVehicle, + streetVehiclesIntermodalUse = intermodalUse, + departureBuffer = bufferToUse ) - responsePlaceholders = makeResponsePlaceholders(withRouting = true) - case (`lastTripIndex`, None) => - // TODO: Is there a way to query egress vehicles near the destination? - makeRequestWith( - withTransit = true, - availableVehicles - .filter(veh => (veh.streetVehicle.mode == vehicleMode) && veh.vehicle.isSharedVehicle) - .map(_.streetVehicle) :+ bodyStreetVehicle + responsePlaceholders = makeResponsePlaceholders( + withRouting = true, + withRideHail = alreadyRequestedRideHail, + withRideHailTransit = alreadyRequestedRideHailTransit ) - responsePlaceholders = makeResponsePlaceholders(withRouting = true) case _ => - // Still go for it, because maybe there are some shared vehicles along the route - makeRequestWith( - withTransit = true, - availableVehicles - .filter(veh => (veh.streetVehicle.mode == vehicleMode) && veh.vehicle.isSharedVehicle) - .map(_.streetVehicle) - :+ bodyStreetVehicle + // Reset available vehicles so we don't release our car that we've left during this replanning + resetVehicles = true + makeRequestWith(withTransit = true, Vector(bodyStreetVehicle)) + responsePlaceholders = makeResponsePlaceholders( + withRouting = true, + withRideHail = alreadyRequestedRideHail, + withRideHailTransit = alreadyRequestedRideHailTransit ) - responsePlaceholders = makeResponsePlaceholders(withRouting = true) } case Some(RIDE_HAIL | RIDE_HAIL_POOLED) if choosesModeData.isWithinTripReplanning => // Give up on all ride hail after a failure responsePlaceholders = makeResponsePlaceholders(withRouting = true) makeRequestWith(withTransit = true, Vector(bodyStreetVehicle)) case Some(RIDE_HAIL | RIDE_HAIL_POOLED) => - responsePlaceholders = makeResponsePlaceholders(withRouting = true, withRideHail = true) - makeRequestWith(withTransit = false, Vector(bodyStreetVehicle)) // We need a WALK alternative if RH fails - makeRideHailRequest() + responsePlaceholders = makeResponsePlaceholders( + withRouting = true, + withRideHail = true, + withRideHailTransit = alreadyRequestedRideHailTransit + ) + makeRequestWith( + withTransit = shouldAlwaysQueryTransit, + Vector(bodyStreetVehicle) + ) // We need a WALK alternative if RH fails + if (!alreadyRequestedRideHail) { makeRideHailRequest() } case Some(RIDE_HAIL_TRANSIT) if choosesModeData.isWithinTripReplanning => // Give up on ride hail transit after a failure, too complicated, but try regular ride hail again responsePlaceholders = makeResponsePlaceholders(withRouting = true, withRideHail = true) - makeRequestWith(withTransit = true, Vector(bodyStreetVehicle)) - makeRideHailRequest() + makeRequestWith(withTransit = true, Vector(bodyStreetVehicle), departureBuffer = bufferToUse) + if (!alreadyRequestedRideHail) { makeRideHailRequest() } case Some(RIDE_HAIL_TRANSIT) => - responsePlaceholders = makeResponsePlaceholders(withRideHailTransit = true) - requestId = makeRideHailTransitRoutingRequest(bodyStreetVehicle) + responsePlaceholders = + makeResponsePlaceholders(withRideHailTransit = true, withRideHail = alreadyRequestedRideHail) + if (!alreadyRequestedRideHailTransit) { requestId = makeRideHailTransitRoutingRequest(bodyStreetVehicle) } case Some(m) => logDebug(m.toString) } + ( responsePlaceholders, requestId, @@ -2534,6 +2814,18 @@ trait ChoosesMode { ) } + /** + * Determines the tour mode and assigns vehicles for a trip or tour based on the given strategy, available modes, + * vehicles, and first leg itineraries. + * + * @param currentTourStrategy The current strategy for selecting the tour mode. + * @param currentTripMode The current mode of the trip, if already determined. + * @param availableVehicles A list of vehicles or tokens available for the person. + * @param choosesModeData The data used for mode choice decisions, containing person-related information. + * @param firstLegItineraries A collection of potential itineraries for the first leg of the trip. + * @return A tuple where the first element is the chosen tour mode (if any), and the second element is a mapping between + * embodied beam trips and the chosen vehicle IDs (if any). + */ private def chooseTourModeAndVehicle( currentTourStrategy: TourModeChoiceStrategy, currentTripMode: Option[BeamMode], @@ -2552,7 +2844,9 @@ trait ChoosesMode { case itin if currentTourStrategy.tourVehicle.exists(itin.vehiclesInTrip.contains) => itin -> currentTourStrategy.tourVehicle case itin if currentTourStrategy.tourVehicle.isEmpty && tourMode.isVehicleBased => - logger.warn("Vehicle based tour mode without vehicle defined") + if (tourMode != FREIGHT_TOUR) { + logger.warn("Vehicle based tour mode without vehicle defined") + } itin -> itin.legs.find(l => l.asDriver && (l.beamLeg.mode != WALK)).map(_.beamVehicleId) }.toMap ) @@ -2665,6 +2959,92 @@ trait ChoosesMode { } } + private def createAndProcessModeChoiceEvent( + chosenTripMaybe: Option[EmbodiedBeamTrip], + tick: Int, + data: ChoosesMode.ChoosesModeData + ): Unit = { + // Write start and end links of chosen route into Activities. + // We don't check yet whether the incoming and outgoing routes agree on the link an Activity is on. + // Our aim should be that every transition from a link to another link be accounted for. + val headOpt = chosenTripMaybe.flatMap( + _.legs.headOption + .flatMap(_.beamLeg.travelPath.linkIds.headOption) + ) + val lastOpt = chosenTripMaybe.flatMap( + _.legs.lastOption + .flatMap(_.beamLeg.travelPath.linkIds.lastOption) + ) + if (headOpt.isDefined && lastOpt.isDefined) { + _experiencedBeamPlan + .activities(data.personData.currentActivityIndex) + .setLinkId(Id.createLinkId(headOpt.get)) + _experiencedBeamPlan + .activities(data.personData.currentActivityIndex + 1) + .setLinkId(Id.createLinkId(lastOpt.get)) + } else { + val origin = beamServices.geo.utm2Wgs( + _experiencedBeamPlan + .activities(data.personData.currentActivityIndex) + .getCoord + ) + val destination = beamServices.geo.utm2Wgs( + _experiencedBeamPlan + .activities(data.personData.currentActivityIndex + 1) + .getCoord + ) + val linkRadiusMeters = beamScenario.beamConfig.beam.routing.r5.linkRadiusMeters + _experiencedBeamPlan + .activities(data.personData.currentActivityIndex) + .setLinkId( + Id.createLinkId( + beamServices.geo.getNearestR5Edge(transportNetwork.streetLayer, origin, linkRadiusMeters) + ) + ) + _experiencedBeamPlan + .activities(data.personData.currentActivityIndex + 1) + .setLinkId( + Id.createLinkId( + beamServices.geo.getNearestR5Edge(transportNetwork.streetLayer, destination, linkRadiusMeters) + ) + ) + } + + val tripId: String = _experiencedBeamPlan.trips + .lift(data.personData.currentActivityIndex + 1) match { + case Some(trip) => + trip.leg.map(l => Option(l.getAttributes.getAttribute("trip_id")).getOrElse("").toString).getOrElse("") + case None => "" + } + + val destinationActivity = nextActivity(data.personData).get + + val initialTourMode = data.personData.currentTourMode + + val tripDistance: Double = + chosenTripMaybe.map(_.legs.view.map(_.beamLeg.travelPath.distanceInM).sum).getOrElse(Double.NaN) + + val modeChoiceEvent = new ModeChoiceEvent( + tick, + id, + chosenTripMaybe + .map(_.tripClassifier.value) + .getOrElse(data.personData.currentTripMode.map(_.value).getOrElse("None")), + initialTourMode.map(_.value).getOrElse(""), + data.expectedMaxUtilityOfLatestChoice.getOrElse[Double](Double.NaN), + _experiencedBeamPlan.activities(data.personData.currentActivityIndex).getLinkId.toString, + data.availableAlternatives.getOrElse(""), + data.availablePersonalStreetVehicles.nonEmpty, + tripDistance, + _experiencedBeamPlan.tourIndexOfElement(destinationActivity), + chosenTripMaybe.getOrElse(EmbodiedBeamTrip.empty), + _experiencedBeamPlan.activities(data.personData.currentActivityIndex).getType, + destinationActivity.getType, + tripId + ) + eventsManager.processEvent(modeChoiceEvent) + } + private def updateTourModeStrategy( newTourMode: Option[BeamTourMode], newTourVehicle: Option[Id[BeamVehicle]], @@ -2674,6 +3054,8 @@ trait ChoosesMode { (newTourMode, newTourVehicle) match { case (Some(CAR_BASED), None) => logger.error("Why are we going into a car based tour without a car?") + case (None, _) => + logger.info(s"Resetting tour mode to None for person ${this.id}") case _ => } val currentTour = _experiencedBeamPlan.getTourContaining(nextActivity) @@ -2765,7 +3147,8 @@ object ChoosesMode { excludeModes: Set[BeamMode] = Set.empty[BeamMode], availableAlternatives: Option[String] = None, routingFinished: Boolean = false, - routingRequestToLegMap: Map[Int, TripIdentifier] = Map.empty + routingRequestToLegMap: Map[Int, TripIdentifier] = Map.empty, + mostRecentDeniedBoardingLeg: Option[EmbodiedBeamLeg] = None ) extends PersonData { override def currentVehicle: VehicleStack = personData.currentVehicle diff --git a/src/main/scala/beam/agentsim/agents/modalbehaviors/DrivesVehicle.scala b/src/main/scala/beam/agentsim/agents/modalbehaviors/DrivesVehicle.scala index 77d0a9a1599..91409065eee 100644 --- a/src/main/scala/beam/agentsim/agents/modalbehaviors/DrivesVehicle.scala +++ b/src/main/scala/beam/agentsim/agents/modalbehaviors/DrivesVehicle.scala @@ -18,7 +18,7 @@ import beam.agentsim.events.RefuelSessionEvent.NotApplicable import beam.agentsim.events._ import beam.agentsim.infrastructure.ChargingNetworkManager._ import beam.agentsim.infrastructure.ParkingInquiry.{ParkingActivityType, ParkingSearchMode} -import beam.agentsim.infrastructure.{ParkingInquiry, ParkingStall} +import beam.agentsim.infrastructure.{ParkingInquiry, ParkingNetworkManager, ParkingStall} import beam.agentsim.scheduler.BeamAgentScheduler.{CompletionNotice, ScheduleTrigger} import beam.agentsim.scheduler.Trigger.TriggerWithId import beam.agentsim.scheduler.{HasTriggerId, Trigger} @@ -384,15 +384,15 @@ trait DrivesVehicle[T <: DrivingData] extends BeamAgent[T] with Stash with Expon numberOfPassengers, currentLeg, getCurrentTripMode(data), - fuelConsumed.primaryFuel, - fuelConsumed.secondaryFuel, - currentBeamVehicle.primaryFuelLevelInJoules, - currentBeamVehicle.secondaryFuelLevelInJoules, - tollOnCurrentLeg, - payloadIds, - currentBeamVehicle.beamVehicleType.curbWeightInKg + payloadWeight, + fuelConsumed.primaryFuel.toFloat, + fuelConsumed.secondaryFuel.toFloat, + currentBeamVehicle.primaryFuelLevelInJoules.toFloat, + currentBeamVehicle.secondaryFuelLevelInJoules.toFloat, + tollOnCurrentLeg.toFloat, + payloadIds.toArray, + (currentBeamVehicle.beamVehicleType.curbWeightInKg + payloadWeight).toFloat, emissionsProfile, - riders + riders.toArray ) eventsManager.processEvent(pte) @@ -637,15 +637,15 @@ trait DrivesVehicle[T <: DrivingData] extends BeamAgent[T] with Stash with Expon numberOfPassengers, partiallyCompletedBeamLeg, getCurrentTripMode(data), - fuelConsumed.primaryFuel, - fuelConsumed.secondaryFuel, - currentBeamVehicle.primaryFuelLevelInJoules, - currentBeamVehicle.secondaryFuelLevelInJoules, - tollOnCurrentLeg, - payloadIds, - currentBeamVehicle.beamVehicleType.curbWeightInKg + payloadWeight, + fuelConsumed.primaryFuel.toFloat, + fuelConsumed.secondaryFuel.toFloat, + currentBeamVehicle.primaryFuelLevelInJoules.toFloat, + currentBeamVehicle.secondaryFuelLevelInJoules.toFloat, + tollOnCurrentLeg.toFloat, + payloadIds.toArray, + (currentBeamVehicle.beamVehicleType.curbWeightInKg + payloadWeight).toFloat, emissionsProfile, - riders + riders.toArray ) eventsManager.processEvent(pte) generateTCSEventIfPossible(pte) @@ -762,15 +762,14 @@ trait DrivesVehicle[T <: DrivingData] extends BeamAgent[T] with Stash with Expon currentBeamVehicle.id == currentVehicleUnderControl, currentBeamVehicle.id + " " + currentVehicleUnderControl ) - currentBeamVehicle.stall.foreach { theStall => - parkingManager ! ReleaseParkingStall(theStall, tick) - currentBeamVehicle.setLastVehicleTimeLink( - Some(tick), - theStall.link.map(_.getId.toString.toInt) - ) - } - currentBeamVehicle.unsetParkingStall() - + ParkingNetworkManager.handleReleasingParkingSpot( + tick, + currentBeamVehicle, + None, + id, + parkingManager, + eventsManager + ) case None => } val triggerToSchedule: Vector[ScheduleTrigger] = data.passengerSchedule diff --git a/src/main/scala/beam/agentsim/agents/modalbehaviors/ModeChoiceCalculator.scala b/src/main/scala/beam/agentsim/agents/modalbehaviors/ModeChoiceCalculator.scala index 883314585a1..826bbedf1d8 100755 --- a/src/main/scala/beam/agentsim/agents/modalbehaviors/ModeChoiceCalculator.scala +++ b/src/main/scala/beam/agentsim/agents/modalbehaviors/ModeChoiceCalculator.scala @@ -123,9 +123,10 @@ trait ModeChoiceCalculator { } def computeAllDayUtility( - trips: ListBuffer[EmbodiedBeamTrip], + trips: Map[EmbodiedBeamTrip, Map[String, Double]], person: Person, - attributesOfIndividual: AttributesOfIndividual + attributesOfIndividual: AttributesOfIndividual, + overrideAttributes: Boolean = false ): Double final def chooseRandomAlternativeIndex(alternatives: Seq[EmbodiedBeamTrip]): Int = { diff --git a/src/main/scala/beam/agentsim/agents/parking/ChoosesParking.scala b/src/main/scala/beam/agentsim/agents/parking/ChoosesParking.scala index ee808158340..d318c8ba7c9 100755 --- a/src/main/scala/beam/agentsim/agents/parking/ChoosesParking.scala +++ b/src/main/scala/beam/agentsim/agents/parking/ChoosesParking.scala @@ -176,6 +176,7 @@ trait ChoosesParking extends { this: PersonAgent => // Self type restricts this trait to only mix into a PersonAgent protected lazy val endOfSimulationTime: Int = DateUtils.getEndOfTime(beamServices.beamConfig) + var latestParkingInquiry: Option[ParkingInquiry] = None private def buildParkingInquiry(data: BasePersonData): ParkingInquiry = { val firstLeg = data.restOfCurrentTrip.head @@ -369,10 +370,19 @@ trait ChoosesParking extends { when(ChoosingParkingSpot) { case Event(ParkingInquiryResponse(stall, _, _), data) => + val tick = _currentTick.getOrElse(0) val distanceThresholdToIgnoreWalking = beamServices.beamConfig.beam.agentsim.thresholdForWalkingInMeters - val nextLeg = - data.passengerSchedule.schedule.keys.drop(data.currentLegPassengerScheduleIndex).head + val existingLeg = data.passengerSchedule.schedule.keys.drop(data.currentLegPassengerScheduleIndex).head + + val (startLegTriggerTick, nextLeg, fixedData) = if (existingLeg.startTime < tick) { + val rescheduledLeg = existingLeg.updateStartTime(tick) + val newSchedule = data.passengerSchedule.replaceLegWithSamePath(existingLeg, rescheduledLeg) + (tick, rescheduledLeg, data.asInstanceOf[BasePersonData].copy(passengerSchedule = newSchedule)) + } else { + (existingLeg.startTime, existingLeg, data) + } + currentBeamVehicle.setReservedParkingStall(Some(stall)) val distance = beamServices.geo.distUTMInMeters(stall.locationUTM, beamServices.geo.wgs2Utm(nextLeg.travelPath.endPoint.loc)) @@ -381,15 +391,15 @@ trait ChoosesParking extends { val (_, triggerId) = releaseTickAndTriggerId() scheduler ! CompletionNotice( triggerId, - Vector(ScheduleTrigger(StartLegTrigger(nextLeg.startTime, nextLeg), self)) + Vector(ScheduleTrigger(StartLegTrigger(startLegTriggerTick, nextLeg), self)) ) - val updatedData = data match { + val updatedData = fixedData match { case data: BasePersonData => data.copy(enrouteData = EnrouteData()) - case _ => data + case _ => fixedData } goto(WaitingToDrive) using updatedData } else { - val (updatedData, isEnrouting) = data match { + val (updatedData, isEnrouting) = fixedData match { case data: BasePersonData if data.enrouteData.isInEnrouteState => val updatedEnrouteData = data.enrouteData.copy(hasReservedFastChargerStall = @@ -397,7 +407,7 @@ trait ChoosesParking extends { ) (data.copy(enrouteData = updatedEnrouteData), updatedEnrouteData.isEnrouting) case _ => - (data, false) + (fixedData, false) } updatedData match { case data: BasePersonData if data.enrouteData.isInEnrouteState && !isEnrouting => @@ -405,7 +415,7 @@ trait ChoosesParking extends { val (tick, triggerId) = releaseTickAndTriggerId() scheduler ! CompletionNotice( triggerId, - Vector(ScheduleTrigger(StartLegTrigger(nextLeg.startTime, nextLeg), self)) + Vector(ScheduleTrigger(StartLegTrigger(startLegTriggerTick, nextLeg), self)) ) handleReleasingParkingSpot(tick, currentBeamVehicle, None, id, parkingManager, beamServices, eventsManager) goto(WaitingToDrive) using data.copy(enrouteData = EnrouteData()) diff --git a/src/main/scala/beam/agentsim/agents/planning/BeamPlan.scala b/src/main/scala/beam/agentsim/agents/planning/BeamPlan.scala index 70b0524e5fc..ff3f06e7df2 100755 --- a/src/main/scala/beam/agentsim/agents/planning/BeamPlan.scala +++ b/src/main/scala/beam/agentsim/agents/planning/BeamPlan.scala @@ -310,18 +310,6 @@ class BeamPlan extends Plan { } } - def lastTripOfCurrentTour(idx: Int): Boolean = { - if (idx <= 1) { - false - } else if (idx + 2 < activities.size) { - val nextTrip = getTourContaining(idx + 2) - val currentTrip = getTourContaining(idx + 1) - (nextTrip.tourId != currentTrip.tourId) - } else { - true - } - } - def tourIndexOfElement(planElement: PlanElement): Int = { (for (tour <- tours.zipWithIndex if tour._1 == getTourContaining(planElement)) yield tour._2).head diff --git a/src/main/scala/beam/agentsim/agents/ridehail/RideHailDepotManager.scala b/src/main/scala/beam/agentsim/agents/ridehail/RideHailDepotManager.scala index e4ba4a0e6eb..0f7c7c10725 100644 --- a/src/main/scala/beam/agentsim/agents/ridehail/RideHailDepotManager.scala +++ b/src/main/scala/beam/agentsim/agents/ridehail/RideHailDepotManager.scala @@ -15,7 +15,9 @@ import beam.agentsim.events.{ParkingEvent, SpaceTime} import beam.agentsim.infrastructure.ChargingNetworkManager._ import beam.agentsim.infrastructure.ParkingInquiry.ParkingSearchMode import beam.agentsim.infrastructure._ +import beam.agentsim.infrastructure.charging.ChargingPointType import beam.agentsim.infrastructure.parking._ +import beam.agentsim.infrastructure.taz.TAZ import beam.agentsim.scheduler.HasTriggerId import beam.router.BeamRouter.Location import beam.sim.config.BeamConfig @@ -275,13 +277,13 @@ trait RideHailDepotManager extends { * @param triggerId Long * @return */ - def sendChargingInquiry( + private def sendChargingInquiry( whenWhere: SpaceTime, beamVehicle: BeamVehicle, triggerId: Long ): Future[ParkingInquiryResponse] = { val (chargingTime, _) = beamVehicle.refuelingSessionDurationAndEnergyInJoulesForStall( - Some(ParkingStall.defaultFastChargingStall(whenWhere.loc)), + Some(RideHailDepotManager.getDefaultFastChargingStall(whenWhere.loc)), None, None, None @@ -304,6 +306,17 @@ trait RideHailDepotManager extends { object RideHailDepotManager { + private def getDefaultFastChargingStall(locationUTM: Location): ParkingStall = ParkingStall( + tazId = TAZ.DefaultTAZId, + parkingZoneId = ParkingZone.DefaultParkingZone.parkingZoneId, + locationUTM = locationUTM, + costInDollars = 0.0, + chargingPointType = Some(ChargingPointType.ChargingStationCcsComboType2), + pricingModel = Some(PricingModel.FlatFee(0)), + parkingType = ParkingType.Public, + reservedFor = VehicleManager.AnyManager + ) + sealed trait RefuelSource case object JustArrivedAtDepot extends RefuelSource case object DequeuedToCharge extends RefuelSource diff --git a/src/main/scala/beam/agentsim/agents/ridehail/RideHailMaster.scala b/src/main/scala/beam/agentsim/agents/ridehail/RideHailMaster.scala index 9b5ccda75f9..dd965a2fdec 100644 --- a/src/main/scala/beam/agentsim/agents/ridehail/RideHailMaster.scala +++ b/src/main/scala/beam/agentsim/agents/ridehail/RideHailMaster.scala @@ -55,7 +55,7 @@ class RideHailMaster( private val rideHailManagers: Map[String, RideHailManagerData] = beamServices.beamConfig.beam.agentsim.agents.rideHail.managers.map { managerConfig => val rideHailManagerId = - VehicleManager.createOrGetReservedFor(managerConfig.name, VehicleManager.TypeEnum.RideHail).managerId + VehicleManager.createOrGetReservedFor(managerConfig.name, Some(VehicleManager.TypeEnum.RideHail)).managerId val rideHailFleetInitializer = rideHailFleetInitializerProvider.get(managerConfig.name) val rhmActorRef = context.actorOf( Props( diff --git a/src/main/scala/beam/agentsim/agents/vehicles/BeamVehicle.scala b/src/main/scala/beam/agentsim/agents/vehicles/BeamVehicle.scala index e0a10f21ad6..8732e766e21 100755 --- a/src/main/scala/beam/agentsim/agents/vehicles/BeamVehicle.scala +++ b/src/main/scala/beam/agentsim/agents/vehicles/BeamVehicle.scala @@ -442,7 +442,8 @@ class BeamVehicle( def isSharedVehicle: Boolean = beamVehicleType.id.toString.startsWith("sharedVehicle") - def isFreightVehicle: Boolean = id.toString.startsWith("freightVehicle") + def isFreight: Boolean = + VehicleManager.getReservedFor(vehicleManagerId.get()).exists(_.managerType == VehicleManager.TypeEnum.Freight) def isCAV: Boolean = beamVehicleType.isConnectedAutomatedVehicle @@ -595,6 +596,10 @@ object BeamVehicle { vehicleId.toString.startsWith(idPrefixSharedTeleportationVehicle) } + def isFreightVehicle(vehicleId: Id[BeamVehicle]): Boolean = { + vehicleId.toString.startsWith("freightVehicle") + } + def isEmergencyVehicle(vehicleId: Id[BeamVehicle]): Boolean = { vehicleId.toString.contains("-emergency-") } diff --git a/src/main/scala/beam/agentsim/agents/vehicles/BeamVehicleType.scala b/src/main/scala/beam/agentsim/agents/vehicles/BeamVehicleType.scala index 959683ea058..f980358b9dd 100755 --- a/src/main/scala/beam/agentsim/agents/vehicles/BeamVehicleType.scala +++ b/src/main/scala/beam/agentsim/agents/vehicles/BeamVehicleType.scala @@ -91,17 +91,18 @@ object VehicleCategory { case exception: Exception => throw new RuntimeException(f"Can not parse vehicle category: '$value'.", exception) } + val values: Vector[VehicleCategory] = Vector( + Body, + Bike, + Car, + MediumDutyPassenger, + Class2b3Vocational, + Class456Vocational, + Class78Vocational, + Class78Tractor + ) + private def fromStringOptional(value: String): Option[VehicleCategory] = { - Vector( - Body, - Bike, - Car, - MediumDutyPassenger, - Class2b3Vocational, - Class456Vocational, - Class78Vocational, - Class78Tractor - ) - .find(_.toString.equalsIgnoreCase(value)) + values.find(_.toString.equalsIgnoreCase(value)) } } diff --git a/src/main/scala/beam/agentsim/agents/vehicles/PassengerSchedule.scala b/src/main/scala/beam/agentsim/agents/vehicles/PassengerSchedule.scala index c5723c62f93..efb14309518 100755 --- a/src/main/scala/beam/agentsim/agents/vehicles/PassengerSchedule.scala +++ b/src/main/scala/beam/agentsim/agents/vehicles/PassengerSchedule.scala @@ -93,6 +93,38 @@ case class PassengerSchedule(schedule: TreeMap[BeamLeg, Manifest]) { schedule.map(keyVal => s"${keyVal._1.toString} -> ${keyVal._2.toString}").mkString("--") } + /** + * Replace a specific leg in the schedule with a new one (typically with an updated start time) + * Finds the leg with matching path regardless of start time + * + * @param oldLeg The leg to find (may not match exactly on start time) + * @param newLeg The replacement leg + * @return Updated PassengerSchedule with the leg replaced + */ + def replaceLegWithSamePath(oldLeg: BeamLeg, newLeg: BeamLeg): PassengerSchedule = { + // Try exact match first + if (schedule.contains(oldLeg)) { + val manifest = schedule(oldLeg) + new PassengerSchedule(schedule - oldLeg + (newLeg -> manifest)) + } else { + // Try to find by matching mode, duration and path (ignoring start time) + val matchingLeg = schedule.keys.find { leg => + leg.mode == oldLeg.mode && + leg.duration == oldLeg.duration && + BeamPath.compare(leg.travelPath, oldLeg.travelPath) == 0 + } + + matchingLeg match { + case Some(leg) => + val manifest = schedule(leg) + new PassengerSchedule(schedule - leg + (newLeg -> manifest)) + case None => + // No matching leg found, return unchanged + this + } + } + } + } //Specialized copy of Ordering.by[Tuple2] so we can control compare diff --git a/src/main/scala/beam/agentsim/agents/vehicles/VehicleManager.scala b/src/main/scala/beam/agentsim/agents/vehicles/VehicleManager.scala index dcee2218ee3..608020184e6 100644 --- a/src/main/scala/beam/agentsim/agents/vehicles/VehicleManager.scala +++ b/src/main/scala/beam/agentsim/agents/vehicles/VehicleManager.scala @@ -3,6 +3,7 @@ package beam.agentsim.agents.vehicles import beam.sim.config.BeamConfig import com.typesafe.scalalogging.LazyLogging import org.matsim.api.core.v01.Id +import beam.agentsim.agents.freight.input.FreightReader.CARRIER_ID_PREFIX import scala.collection.concurrent.TrieMap import scala.util.matching.Regex @@ -19,7 +20,7 @@ object VehicleManager extends LazyLogging { AnyManager.managerId -> AnyManager ) - val CustomReservedForRegex: Regex = """([\w-]+)\(([\w-]+)\)""".r.unanchored + private val CustomReservedForRegex: Regex = """([\w-]+)\(([\w-]+)\)""".r.unanchored object TypeEnum extends Enumeration { type VehicleManagerType = Value @@ -45,8 +46,21 @@ object VehicleManager extends LazyLogging { def getReservedFor(managerId: Id[VehicleManager]): Option[ReservedFor] = vehicleManagers.get(managerId) - def createOrGetReservedFor(idString: String, vehType: TypeEnum.VehicleManagerType): ReservedFor = { + def createOrGetReservedFor( + idString: String, + vehTypeMaybe: Option[TypeEnum.VehicleManagerType] = None + ): ReservedFor = { val vehId = Id.create(idString, classOf[VehicleManager]) + val vehType = vehTypeMaybe.getOrElse { + if (idString.startsWith(CARRIER_ID_PREFIX)) + TypeEnum.Freight + else if (idString.startsWith("ridehail")) + TypeEnum.RideHail + else if (idString.startsWith("shared")) + TypeEnum.Shared + else + TypeEnum.Household + } if (vehicleManagers.contains(vehId) && vehicleManagers(vehId).managerType != vehType) throw new RuntimeException("Duplicate vehicle manager ids is not allowed") val reservedFor = ReservedFor(vehId, vehType) @@ -59,9 +73,9 @@ object VehicleManager extends LazyLogging { case null | "" => Some(VehicleManager.AnyManager) case x if x == VehicleManager.AnyManager.managerId.toString => - Some(createOrGetReservedFor(reservedForString, TypeEnum.Default)) + Some(createOrGetReservedFor(reservedForString, Some(TypeEnum.Default))) case CustomReservedForRegex(kind, id) => - Some(createOrGetReservedFor(id, TypeEnum.withName(kind.trim.toLowerCase))) + Some(createOrGetReservedFor(id, Some(TypeEnum.withName(kind.trim.toLowerCase)))) case _ => None } @@ -70,16 +84,16 @@ object VehicleManager extends LazyLogging { val sharedFleets = cfgAgentSim.agents.vehicles.sharedFleets val rideHailManagers = cfgAgentSim.agents.rideHail.managers reservedForMaybe = reservedForString match { - case cfgAgentSim.agents.freight.name => Some(createOrGetReservedFor(reservedForString, TypeEnum.Freight)) + case cfgAgentSim.agents.freight.name => Some(createOrGetReservedFor(reservedForString, Some(TypeEnum.Freight))) case reservedFor if rideHailManagers.exists(_.name == reservedFor) => - Some(createOrGetReservedFor(reservedForString, TypeEnum.RideHail)) + Some(createOrGetReservedFor(reservedForString, Some(TypeEnum.RideHail))) case reservedFor if sharedFleets.exists(_.name == reservedFor) => - Some(createOrGetReservedFor(reservedForString, TypeEnum.Shared)) + Some(createOrGetReservedFor(reservedForString, Some(TypeEnum.Shared))) case _ => None } } - reservedForMaybe map { case ReservedFor(mngId, mngType) => createOrGetReservedFor(mngId.toString, mngType) } + reservedForMaybe } def reserveForToString(reservedFor: ReservedFor): String = { diff --git a/src/main/scala/beam/agentsim/events/ChargingPlugInEvent.scala b/src/main/scala/beam/agentsim/events/ChargingPlugInEvent.scala index a6c57f4ff03..5632d7cbb29 100644 --- a/src/main/scala/beam/agentsim/events/ChargingPlugInEvent.scala +++ b/src/main/scala/beam/agentsim/events/ChargingPlugInEvent.scala @@ -1,14 +1,14 @@ package beam.agentsim.events -import java.util - import beam.agentsim.infrastructure.ParkingStall -import org.matsim.api.core.v01.{Coord, Id} import org.matsim.api.core.v01.events.Event import org.matsim.api.core.v01.population.Person +import org.matsim.api.core.v01.{Coord, Id} import org.matsim.core.api.internal.HasPersonId import org.matsim.vehicles.Vehicle +import java.util + case class ChargingPlugInEvent( tick: Double, stall: ParkingStall, @@ -40,6 +40,7 @@ case class ChargingPlugInEvent( attributes.put(ATTRIBUTE_PRICING_MODEL, pricingModelString) attributes.put(ATTRIBUTE_CHARGING_TYPE, chargingPointString) attributes.put(ATTRIBUTE_PARKING_TAZ, stall.tazId.toString) + attributes.put(ATTRIBUTE_PARKING_ZONE_ID, stall.parkingZoneId.toString) attributes } @@ -57,4 +58,5 @@ object ChargingPlugInEvent { val ATTRIBUTE_PRICING_MODEL: String = "pricingModel" val ATTRIBUTE_CHARGING_TYPE: String = "chargingPointType" val ATTRIBUTE_PARKING_TAZ: String = "parkingTaz" + val ATTRIBUTE_PARKING_ZONE_ID: String = "parkingZoneId" } diff --git a/src/main/scala/beam/agentsim/events/ChargingPlugOutEvent.scala b/src/main/scala/beam/agentsim/events/ChargingPlugOutEvent.scala index 51925ee7a95..d3582e62a64 100644 --- a/src/main/scala/beam/agentsim/events/ChargingPlugOutEvent.scala +++ b/src/main/scala/beam/agentsim/events/ChargingPlugOutEvent.scala @@ -1,7 +1,5 @@ package beam.agentsim.events -import java.util - import beam.agentsim.infrastructure.ParkingStall import org.matsim.api.core.v01.Id import org.matsim.api.core.v01.events.Event @@ -9,6 +7,8 @@ import org.matsim.api.core.v01.population.Person import org.matsim.core.api.internal.HasPersonId import org.matsim.vehicles.Vehicle +import java.util + case class ChargingPlugOutEvent( tick: Double, stall: ParkingStall, @@ -49,6 +49,7 @@ case class ChargingPlugOutEvent( attributes.put(ATTRIBUTE_PRICING_MODEL, pricingModelString) attributes.put(ATTRIBUTE_CHARGING_TYPE, chargingPointString) attributes.put(ATTRIBUTE_PARKING_TAZ, stall.tazId.toString) + attributes.put(ATTRIBUTE_PARKING_ZONE_ID, stall.parkingZoneId.toString) attributes } @@ -66,5 +67,5 @@ object ChargingPlugOutEvent { val ATTRIBUTE_PRICING_MODEL: String = "pricingModel" val ATTRIBUTE_CHARGING_TYPE: String = "chargingPointType" val ATTRIBUTE_PARKING_TAZ: String = "parkingTaz" - + val ATTRIBUTE_PARKING_ZONE_ID: String = "parkingZoneId" } diff --git a/src/main/scala/beam/agentsim/events/LeavingParkingEvent.scala b/src/main/scala/beam/agentsim/events/LeavingParkingEvent.scala index 10d29e7cb2b..ddc20ae5720 100755 --- a/src/main/scala/beam/agentsim/events/LeavingParkingEvent.scala +++ b/src/main/scala/beam/agentsim/events/LeavingParkingEvent.scala @@ -23,7 +23,8 @@ case class LeavingParkingEvent( parkingType: ParkingType, pricingModel: Option[PricingModel], ChargingPointType: Option[ChargingPointType], - emissionsProfile: Option[EmissionsProfile] + emissionsProfile: Option[EmissionsProfile], + parkingZoneId: Id[ParkingZoneId] ) extends Event(time) with ScalaEvent { import LeavingParkingEvent._ @@ -42,6 +43,8 @@ case class LeavingParkingEvent( attr.put(ATTRIBUTE_EMISSIONS_PROFILE, emissionsProfile.map(BeamVehicleUtils.buildEmissionsString).getOrElse("")) attr.put(ATTRIBUTE_PARKING_DURATION, parkingDuration.toString) attr.put(ATTRIBUTE_COST, pricingModel.map(_.costInDollars.toString).getOrElse("0")) + attr.put(ATTRIBUTE_PARKING_ZONE_ID, parkingZoneId.toString) + attr } } @@ -74,6 +77,7 @@ object LeavingParkingEvent { val ATTRIBUTE_DRIVER_ID: String = "driver" val ATTRIBUTE_EMISSIONS_PROFILE: String = "emissions" val ATTRIBUTE_PARKING_DURATION: String = "duration" + val ATTRIBUTE_PARKING_ZONE_ID: String = "parkingZoneId" def apply( time: Double, @@ -93,7 +97,8 @@ object LeavingParkingEvent { stall.parkingType, stall.pricingModel, stall.chargingPointType, - emissionsProfile + emissionsProfile, + parkingZoneId = stall.parkingZoneId ) } @@ -124,5 +129,17 @@ object LeavingParkingEvent { chargingPointType, emissionsProfile ) + val parkingZoneId: Id[ParkingZoneId] = Id.create(attr(ATTRIBUTE_PARKING_ZONE_ID), classOf[ParkingZoneId]) + LeavingParkingEvent( + time, + personId, + vehicleId, + tazId, + score, + parkingType, + pricingModel, + chargingPointType, + parkingZoneId + ) } } diff --git a/src/main/scala/beam/agentsim/events/ParkingEvent.scala b/src/main/scala/beam/agentsim/events/ParkingEvent.scala index 7a87d1ef6e3..257c72a7c27 100755 --- a/src/main/scala/beam/agentsim/events/ParkingEvent.scala +++ b/src/main/scala/beam/agentsim/events/ParkingEvent.scala @@ -2,7 +2,7 @@ package beam.agentsim.events import beam.agentsim.infrastructure.ParkingStall import beam.agentsim.infrastructure.charging.ChargingPointType -import beam.agentsim.infrastructure.parking.{ParkingType, PricingModel} +import beam.agentsim.infrastructure.parking.{ParkingType, ParkingZoneId, PricingModel} import beam.agentsim.infrastructure.taz.TAZ import beam.sim.common.GeoUtils import com.typesafe.scalalogging.LazyLogging @@ -22,7 +22,8 @@ case class ParkingEvent( locationWGS: Coord, parkingType: ParkingType, pricingModel: Option[PricingModel], - chargingPointType: Option[ChargingPointType] + chargingPointType: Option[ChargingPointType], + parkingZoneId: Id[ParkingZoneId] ) extends Event(time) with ScalaEvent with LazyLogging { @@ -54,6 +55,7 @@ case class ParkingEvent( attr.put(ATTRIBUTE_PRICING_MODEL, pricingModelString) attr.put(ATTRIBUTE_CHARGING_TYPE, chargingPointString) attr.put(ATTRIBUTE_PARKING_TAZ, tazId.toString) + attr.put(ATTRIBUTE_PARKING_ZONE_ID, parkingZoneId.toString) attr } @@ -71,6 +73,7 @@ object ParkingEvent { val ATTRIBUTE_PRICING_MODEL: String = "pricingModel" val ATTRIBUTE_CHARGING_TYPE: String = "chargingPointType" val ATTRIBUTE_PARKING_TAZ: String = "parkingTaz" + val ATTRIBUTE_PARKING_ZONE_ID: String = "parkingZoneId" def apply( time: Double, @@ -88,7 +91,8 @@ object ParkingEvent { locationWGS = locationWGS, parkingType = stall.parkingType, pricingModel = stall.pricingModel, - chargingPointType = stall.chargingPointType + chargingPointType = stall.chargingPointType, + parkingZoneId = stall.parkingZoneId ) } @@ -105,6 +109,17 @@ object ParkingEvent { val pricingModel: Option[PricingModel] = attr.get(ATTRIBUTE_PRICING_MODEL).flatMap(PricingModel(_, attr.getOrElse(ATTRIBUTE_COST, "0"))) val chargingPointType: Option[ChargingPointType] = attr.get(ATTRIBUTE_CHARGING_TYPE).flatMap(ChargingPointType(_)) - new ParkingEvent(time, driverId, vehicleId, tazId, locationWGS, parkingType, pricingModel, chargingPointType) + val parkingZoneId = Id.create(attr(ATTRIBUTE_PARKING_ZONE_ID), classOf[ParkingZoneId]) + new ParkingEvent( + time, + driverId, + vehicleId, + tazId, + locationWGS, + parkingType, + pricingModel, + chargingPointType, + parkingZoneId + ) } } diff --git a/src/main/scala/beam/agentsim/events/PathTraversalEvent.scala b/src/main/scala/beam/agentsim/events/PathTraversalEvent.scala index 87cfc07dec4..9105910fe6d 100644 --- a/src/main/scala/beam/agentsim/events/PathTraversalEvent.scala +++ b/src/main/scala/beam/agentsim/events/PathTraversalEvent.scala @@ -30,76 +30,69 @@ case class PathTraversalEvent( arrivalTime: Int, mode: BeamMode, legLength: Double, - linkIds: IndexedSeq[Int], - linkTravelTime: IndexedSeq[Double], - startX: Double, - startY: Double, - endX: Double, - endY: Double, - primaryFuelConsumed: Double, - secondaryFuelConsumed: Double, - endLegPrimaryFuelLevel: Double, - endLegSecondaryFuelLevel: Double, - amountPaid: Double, + linkIds: Array[Int], + linkTravelTime: Array[Float], + startX: Float, + startY: Float, + endX: Float, + endY: Float, + primaryFuelConsumed: Float, + secondaryFuelConsumed: Float, + endLegPrimaryFuelLevel: Float, + endLegSecondaryFuelLevel: Float, + amountPaid: Float, fromStopIndex: Option[Int], toStopIndex: Option[Int], currentTripMode: Option[String], - payloadIds: IndexedSeq[Id[PayloadPlan]], - weight: Double, + payloadIds: Array[Id[PayloadPlan]], + weight: Float, emissionsProfile: Option[EmissionsProfile], - riders: IndexedSeq[Id[Person]] = Vector() + riders: Array[Id[Person]] = Array() ) extends Event(time) with ScalaEvent { import PathTraversalEvent._ def capacity: Int = seatingCapacity + standingRoomCapacity - def linkIdsJava: util.List[Int] = linkIds.asJava + def linkIdsJava: util.List[Int] = linkIds.toList.asJava override def getEventType: String = "PathTraversal" - private val filledAttrs: AtomicReference[util.Map[String, String]] = - new AtomicReference[util.Map[String, String]](null) - override def getAttributes: util.Map[String, String] = { - if (filledAttrs.get() != null) filledAttrs.get() - else { - val attr = super.getAttributes() - attr.put(ATTRIBUTE_VEHICLE_ID, vehicleId.toString) - attr.put(ATTRIBUTE_DRIVER_ID, driverId) - attr.put(ATTRIBUTE_VEHICLE_TYPE, vehicleType) - attr.put(ATTRIBUTE_LENGTH, legLength.toString) - attr.put(ATTRIBUTE_NUM_PASS, numberOfPassengers.toString) + val attr = super.getAttributes + attr.put(ATTRIBUTE_VEHICLE_ID, vehicleId.toString) + attr.put(ATTRIBUTE_DRIVER_ID, driverId) + attr.put(ATTRIBUTE_VEHICLE_TYPE, vehicleType) + attr.put(ATTRIBUTE_LENGTH, legLength.toString) + attr.put(ATTRIBUTE_NUM_PASS, numberOfPassengers.toString) - attr.put(ATTRIBUTE_DEPARTURE_TIME, departureTime.toString) - attr.put(ATTRIBUTE_ARRIVAL_TIME, arrivalTime.toString) - attr.put(ATTRIBUTE_MODE, mode.value) - attr.put(ATTRIBUTE_LINK_IDS, linkIds.mkString(",")) - attr.put(ATTRIBUTE_LINK_TRAVEL_TIME, linkTravelTime.map(FormatUtils.DECIMAL_3.format).mkString(",")) - attr.put(ATTRIBUTE_PRIMARY_FUEL_TYPE, primaryFuelType) - attr.put(ATTRIBUTE_SECONDARY_FUEL_TYPE, secondaryFuelType) - attr.put(ATTRIBUTE_PRIMARY_FUEL, primaryFuelConsumed.toString) - attr.put(ATTRIBUTE_SECONDARY_FUEL, secondaryFuelConsumed.toString) - attr.put(ATTRIBUTE_VEHICLE_CAPACITY, capacity.toString) + attr.put(ATTRIBUTE_DEPARTURE_TIME, departureTime.toString) + attr.put(ATTRIBUTE_ARRIVAL_TIME, arrivalTime.toString) + attr.put(ATTRIBUTE_MODE, mode.value) + attr.put(ATTRIBUTE_LINK_IDS, linkIds.mkString(",")) + attr.put(ATTRIBUTE_LINK_TRAVEL_TIME, linkTravelTime.map(FormatUtils.DECIMAL_3.format).mkString(",")) + attr.put(ATTRIBUTE_PRIMARY_FUEL_TYPE, primaryFuelType) + attr.put(ATTRIBUTE_SECONDARY_FUEL_TYPE, secondaryFuelType) + attr.put(ATTRIBUTE_PRIMARY_FUEL, primaryFuelConsumed.toString) + attr.put(ATTRIBUTE_SECONDARY_FUEL, secondaryFuelConsumed.toString) + attr.put(ATTRIBUTE_VEHICLE_CAPACITY, capacity.toString) - attr.put(ATTRIBUTE_START_COORDINATE_X, startX.toString) - attr.put(ATTRIBUTE_START_COORDINATE_Y, startY.toString) - attr.put(ATTRIBUTE_END_COORDINATE_X, endX.toString) - attr.put(ATTRIBUTE_END_COORDINATE_Y, endY.toString) - attr.put(ATTRIBUTE_END_LEG_PRIMARY_FUEL_LEVEL, endLegPrimaryFuelLevel.toString) - attr.put(ATTRIBUTE_END_LEG_SECONDARY_FUEL_LEVEL, endLegSecondaryFuelLevel.toString) - attr.put(ATTRIBUTE_SEATING_CAPACITY, seatingCapacity.toString) - attr.put(ATTRIBUTE_TOLL_PAID, amountPaid.toString) - attr.put(ATTRIBUTE_FROM_STOP_INDEX, fromStopIndex.map(_.toString).getOrElse("")) - attr.put(ATTRIBUTE_TO_STOP_INDEX, toStopIndex.map(_.toString).getOrElse("")) - attr.put(ATTRIBUTE_CURRENT_TRIP_MODE, currentTripMode.getOrElse("")) - attr.put(ATTRIBUTE_PAYLOAD_IDS, payloadIds.mkString(",")) - attr.put(ATTRIBUTE_WEIGHT, weight.toString) - attr.put(ATTRIBUTE_RIDERS, ridersToStr(riders)) - attr.put(EMISSIONS_PROFILE, emissionsProfile.map(BeamVehicleUtils.buildEmissionsString).getOrElse("")) - filledAttrs.set(attr) - attr - } + attr.put(ATTRIBUTE_START_COORDINATE_X, startX.toString) + attr.put(ATTRIBUTE_START_COORDINATE_Y, startY.toString) + attr.put(ATTRIBUTE_END_COORDINATE_X, endX.toString) + attr.put(ATTRIBUTE_END_COORDINATE_Y, endY.toString) + attr.put(ATTRIBUTE_END_LEG_PRIMARY_FUEL_LEVEL, endLegPrimaryFuelLevel.toString) + attr.put(ATTRIBUTE_END_LEG_SECONDARY_FUEL_LEVEL, endLegSecondaryFuelLevel.toString) + attr.put(ATTRIBUTE_SEATING_CAPACITY, seatingCapacity.toString) + attr.put(ATTRIBUTE_TOLL_PAID, amountPaid.toString) + attr.put(ATTRIBUTE_FROM_STOP_INDEX, fromStopIndex.map(_.toString).getOrElse("")) + attr.put(ATTRIBUTE_TO_STOP_INDEX, toStopIndex.map(_.toString).getOrElse("")) + attr.put(ATTRIBUTE_CURRENT_TRIP_MODE, currentTripMode.getOrElse("")) + attr.put(ATTRIBUTE_PAYLOAD_IDS, payloadIds.mkString(",")) + attr.put(ATTRIBUTE_WEIGHT, weight.toString) + attr.put(ATTRIBUTE_RIDERS, ridersToStr(riders)) + attr.put(EMISSIONS_PROFILE, emissionsProfile.map(BeamVehicleUtils.buildEmissionsString).getOrElse("")) + attr } } @@ -146,15 +139,15 @@ object PathTraversalEvent { numPass: Int, beamLeg: BeamLeg, currentTripMode: Option[String], - primaryFuelConsumed: Double, - secondaryFuelConsumed: Double, - endLegPrimaryFuelLevel: Double, - endLegSecondaryFuelLevel: Double, - amountPaid: Double, - payloadIds: IndexedSeq[Id[PayloadPlan]], - weight: Double, + primaryFuelConsumed: Float, + secondaryFuelConsumed: Float, + endLegPrimaryFuelLevel: Float, + endLegSecondaryFuelLevel: Float, + amountPaid: Float, + payloadIds: Array[Id[PayloadPlan]], + weight: Float, emissionsProfile: Option[EmissionsProfile], - riders: IndexedSeq[Id[Person]] + riders: Array[Id[Person]] ): PathTraversalEvent = { new PathTraversalEvent( time = time, @@ -171,11 +164,11 @@ object PathTraversalEvent { mode = beamLeg.mode, legLength = beamLeg.travelPath.distanceInM, linkIds = beamLeg.travelPath.linkIds, - linkTravelTime = beamLeg.travelPath.linkTravelTime, - startX = beamLeg.travelPath.startPoint.loc.getX, - startY = beamLeg.travelPath.startPoint.loc.getY, - endX = beamLeg.travelPath.endPoint.loc.getX, - endY = beamLeg.travelPath.endPoint.loc.getY, + linkTravelTime = beamLeg.travelPath.linkTravelTime.map(_.toFloat), + startX = beamLeg.travelPath.startPoint.loc.getX.toFloat, + startY = beamLeg.travelPath.startPoint.loc.getY.toFloat, + endX = beamLeg.travelPath.endPoint.loc.getX.toFloat, + endY = beamLeg.travelPath.endPoint.loc.getY.toFloat, primaryFuelConsumed = primaryFuelConsumed, secondaryFuelConsumed = secondaryFuelConsumed, endLegPrimaryFuelLevel = endLegPrimaryFuelLevel, @@ -209,23 +202,23 @@ object PathTraversalEvent { val mode: BeamMode = BeamMode.fromString(attr(ATTRIBUTE_MODE)).get val legLength: Double = attr(ATTRIBUTE_LENGTH).toDouble val linkIdsAsStr = Option(attr(ATTRIBUTE_LINK_IDS)).getOrElse("") - val linkIds: IndexedSeq[Int] = if (linkIdsAsStr == "") IndexedSeq.empty else linkIdsAsStr.split(",").map(_.toInt) + val linkIds: Array[Int] = if (linkIdsAsStr == "") Array.empty else linkIdsAsStr.split(",").map(_.toInt) val linkTravelTimeStr = attr.getOrElse(ATTRIBUTE_LINK_TRAVEL_TIME, "") - val linkTravelTime: IndexedSeq[Double] = - if (linkTravelTimeStr == null || linkTravelTimeStr == "") IndexedSeq.empty - else linkTravelTimeStr.split(",").map(_.toDouble) - val startX: Double = attr(ATTRIBUTE_START_COORDINATE_X).toDouble - val startY: Double = attr(ATTRIBUTE_START_COORDINATE_Y).toDouble - val endX: Double = attr(ATTRIBUTE_END_COORDINATE_X).toDouble - val endY: Double = attr(ATTRIBUTE_END_COORDINATE_Y).toDouble - val primaryFuelConsumed: Double = attr(ATTRIBUTE_PRIMARY_FUEL).toDouble - val secondaryFuelConsumed: Double = attr(ATTRIBUTE_SECONDARY_FUEL).toDouble - val endLegPrimaryFuelLevel: Double = attr(ATTRIBUTE_END_LEG_PRIMARY_FUEL_LEVEL).toDouble - val endLegSecondaryFuelLevel: Double = attr(ATTRIBUTE_END_LEG_SECONDARY_FUEL_LEVEL).toDouble - val amountPaid: Double = attr(ATTRIBUTE_TOLL_PAID).toDouble - val payloadIds: IndexedSeq[Id[PayloadPlan]] = payloadsFromStr(attr.getOrElse(ATTRIBUTE_PAYLOAD_IDS, "")) - val weight: Double = attr.get(ATTRIBUTE_WEIGHT).fold(0.0)(_.toDouble) - val riders: IndexedSeq[Id[Person]] = ridersFromStr(attr.getOrElse(ATTRIBUTE_RIDERS, "")) + val linkTravelTime: Array[Float] = + if (linkTravelTimeStr == null || linkTravelTimeStr == "") Array.empty + else linkTravelTimeStr.split(",").map(_.toFloat) + val startX: Float = attr(ATTRIBUTE_START_COORDINATE_X).toFloat + val startY: Float = attr(ATTRIBUTE_START_COORDINATE_Y).toFloat + val endX: Float = attr(ATTRIBUTE_END_COORDINATE_X).toFloat + val endY: Float = attr(ATTRIBUTE_END_COORDINATE_Y).toFloat + val primaryFuelConsumed: Float = attr(ATTRIBUTE_PRIMARY_FUEL).toFloat + val secondaryFuelConsumed: Float = attr(ATTRIBUTE_SECONDARY_FUEL).toFloat + val endLegPrimaryFuelLevel: Float = attr(ATTRIBUTE_END_LEG_PRIMARY_FUEL_LEVEL).toFloat + val endLegSecondaryFuelLevel: Float = attr(ATTRIBUTE_END_LEG_SECONDARY_FUEL_LEVEL).toFloat + val amountPaid: Float = attr(ATTRIBUTE_TOLL_PAID).toFloat + val payloadIds: Array[Id[PayloadPlan]] = payloadsFromStr(attr.getOrElse(ATTRIBUTE_PAYLOAD_IDS, "")) + val weight: Float = attr.get(ATTRIBUTE_WEIGHT).fold(0.0f)(_.toFloat) + val riders: Array[Id[Person]] = ridersFromStr(attr.getOrElse(ATTRIBUTE_RIDERS, "")) val fromStopIndex: Option[Int] = attr.get(ATTRIBUTE_FROM_STOP_INDEX).flatMap(Option(_)).flatMap(x => if (x == "") None else Some(x.toInt)) val toStopIndex: Option[Int] = @@ -268,20 +261,20 @@ object PathTraversalEvent { ) } - private def ridersFromStr(ridersStr: String): IndexedSeq[Id[Person]] = { + private def ridersFromStr(ridersStr: String): Array[Id[Person]] = { if (ridersStr.isEmpty) { - Vector() + Array() } else { - ridersStr.split(":").toIndexedSeq.map(Id.create(_, classOf[Person])) + ridersStr.split(":").map(Id.create(_, classOf[Person])) } } - private def payloadsFromStr(str: String): IndexedSeq[Id[PayloadPlan]] = { - if (str.isEmpty) IndexedSeq.empty + private def payloadsFromStr(str: String): Array[Id[PayloadPlan]] = { + if (str.isEmpty) Array.empty else str.split(',').map(_.createId[PayloadPlan]) } - private def ridersToStr(riders: IndexedSeq[Id[Person]]): String = { + private def ridersToStr(riders: Array[Id[Person]]): String = { riders.mkString(":") } } diff --git a/src/main/scala/beam/agentsim/events/handling/BeamEventsWriterParquet.scala b/src/main/scala/beam/agentsim/events/handling/BeamEventsWriterParquet.scala index 476a353eaf7..1500c147246 100644 --- a/src/main/scala/beam/agentsim/events/handling/BeamEventsWriterParquet.scala +++ b/src/main/scala/beam/agentsim/events/handling/BeamEventsWriterParquet.scala @@ -97,7 +97,13 @@ class BeamEventsWriterParquet( } override def closeFile(): Unit = { - parquetWriter.close() + if (parquetWriter != null) { + try { + parquetWriter.close() + } catch { + case e: Exception => logger.error("Error closing ParquetWriter", e) + } + } } def toGenericDataRecord(event: Event, columnNames: Seq[String]): GenericData.Record = { diff --git a/src/main/scala/beam/agentsim/infrastructure/ChargingFunctions.scala b/src/main/scala/beam/agentsim/infrastructure/ChargingFunctions.scala index 506bd4896bd..afc5fe6b321 100644 --- a/src/main/scala/beam/agentsim/infrastructure/ChargingFunctions.scala +++ b/src/main/scala/beam/agentsim/infrastructure/ChargingFunctions.scala @@ -2,7 +2,7 @@ package beam.agentsim.infrastructure import beam.agentsim.agents.vehicles.FuelType.FuelType import beam.agentsim.agents.vehicles.{BeamVehicleType, VehicleManager} -import beam.agentsim.infrastructure.ParkingInquiry.ParkingActivityType.{Charge, EnRoute, Home, Work} +import beam.agentsim.infrastructure.ParkingInquiry.ParkingActivityType._ import beam.agentsim.infrastructure.ParkingInquiry.ParkingSearchMode import beam.agentsim.infrastructure.charging.ChargingPointType import beam.agentsim.infrastructure.parking.ParkingZoneSearch.{ParkingAlternative, ParkingZoneSearchResult} @@ -121,8 +121,7 @@ class ChargingFunctions( val verifyCharger = inquiry.beamVehicle.isDefined && inquiry.beamVehicle.get.beamVehicleType.chargingCapability.isDefined && ( inquiry.searchMode == ParkingSearchMode.EnRouteCharging || - inquiry.parkingActivityType == Charge || - inquiry.parkingActivityType == EnRoute + inquiry.parkingActivityType == Charge ) if (!verifyCharger) { return true @@ -299,10 +298,12 @@ class ChargingFunctions( import ParkingSearchMode._ if (parkingConfig.forceParkingType && !List(EnRouteCharging, Init).contains(inquiry.searchMode)) { inquiry.parkingActivityType match { - case Home => Set(ParkingType.Residential) - case Work => Set(ParkingType.Workplace) - case Charge => Set(ParkingType.Workplace, ParkingType.Public, ParkingType.Residential) - case _ => Set(ParkingType.Public) + case Home => Set(ParkingType.Residential) + case Work => Set(ParkingType.Workplace) + case Charge => Set(ParkingType.Public) + case Commercial => Set(ParkingType.Commercial) + case Depot => Set(ParkingType.Depot) + case _ => Set(ParkingType.Public) } } else super[ParkingFunctions].getPreferredParkingTypes(inquiry) } diff --git a/src/main/scala/beam/agentsim/infrastructure/HierarchicalParkingManager.scala b/src/main/scala/beam/agentsim/infrastructure/HierarchicalParkingManager.scala index 6afdf1897b1..13a863887dd 100644 --- a/src/main/scala/beam/agentsim/infrastructure/HierarchicalParkingManager.scala +++ b/src/main/scala/beam/agentsim/infrastructure/HierarchicalParkingManager.scala @@ -5,10 +5,8 @@ import beam.agentsim.agents.vehicles.VehicleCategory.VehicleCategory import beam.agentsim.agents.vehicles.VehicleManager.ReservedFor import beam.agentsim.infrastructure.HierarchicalParkingManager._ import beam.agentsim.infrastructure.charging.ChargingPointType -import beam.agentsim.infrastructure.parking.ParkingZone.UbiqiutousParkingAvailability import beam.agentsim.infrastructure.parking._ import beam.agentsim.infrastructure.taz.{TAZ, TAZTreeMap} -import beam.router.BeamRouter.Location import beam.sim.common.GeoUtils import beam.sim.config.BeamConfig import beam.utils.matsim_conversion.ShapeUtils @@ -66,17 +64,10 @@ class HierarchicalParkingManager( ) ) - val DefaultParkingZone: ParkingZone = - ParkingZone.defaultInit( - TAZ.DefaultTAZId, - ParkingType.Public, - UbiqiutousParkingAvailability - ) - /** * For each TAZ it contains a Map: ParkingZoneDescription -> ParkingZoneTreeMap */ - protected val tazSearchMap: Map[Id[TAZ], Map[ParkingZoneDescription, QuadTree[ParkingZone]]] = + private val tazSearchMap: Map[Id[TAZ], Map[ParkingZoneDescription, QuadTree[ParkingZone]]] = createDescriptionToZonesMapForEachTaz(parkingZones, tazMap.idToTAZMapping) if (checkThatNumberOfStallsMatch) { @@ -103,7 +94,7 @@ class HierarchicalParkingManager( searchFunctions.get.searchForParkingStall(inquiry) val (parkingStall: ParkingStall, parkingZone: ParkingZone) = - if (TAZ.isSpecialTazId(tazParkingStall.tazId)) tazParkingStall -> DefaultParkingZone + if (TAZ.isSpecialTazId(tazParkingStall.tazId)) tazParkingStall -> ParkingZone.DefaultParkingZone else { val descriptionToZone = tazSearchMap(tazParkingZone.tazId) findAppropriateLinkParkingZoneWithinTaz(tazParkingZone, descriptionToZone, inquiry.destinationUtm.loc) match { @@ -118,7 +109,8 @@ class HierarchicalParkingManager( "Cannot find link parking parking zone for taz zone {}. Parallel changing of stallsAvailable?", tazParkingZone ) - lastResortStallAndZone(inquiry.destinationUtm.loc) + val (newStall, _) = ParkingStall.lastResortStall(inquiry.destinationUtm.loc, new Random(seed)) + newStall } } @@ -171,7 +163,7 @@ class HierarchicalParkingManager( */ override def processReleaseParkingStall(release: ReleaseParkingStall): Boolean = { val parkingZoneId = release.stall.parkingZoneId - if (parkingZoneId == ParkingZone.DefaultParkingZoneId) { + if (parkingZoneId == ParkingZone.DefaultParkingZone.parkingZoneId) { // this is an infinitely available resource; no update required logger.debug("Releasing a stall in the default/emergency zone") true @@ -238,7 +230,7 @@ object HierarchicalParkingManager { timeRestrictions: Map[VehicleCategory, Range] ) - object ParkingZoneDescription { + private object ParkingZoneDescription { def describeParkingZone(zone: ParkingZone): ParkingZoneDescription = { new ParkingZoneDescription( diff --git a/src/main/scala/beam/agentsim/infrastructure/InfrastructureFunctions.scala b/src/main/scala/beam/agentsim/infrastructure/InfrastructureFunctions.scala index 4c46d8edb25..5cb6f2c7587 100644 --- a/src/main/scala/beam/agentsim/infrastructure/InfrastructureFunctions.scala +++ b/src/main/scala/beam/agentsim/infrastructure/InfrastructureFunctions.scala @@ -3,7 +3,6 @@ package beam.agentsim.infrastructure import beam.agentsim.agents.choice.logit.UtilityFunctionOperation import beam.agentsim.infrastructure.ParkingInquiry.ParkingActivityType import beam.agentsim.infrastructure.charging.ChargingPointType -import beam.agentsim.infrastructure.parking.ParkingZone.UbiqiutousParkingAvailability import beam.agentsim.infrastructure.parking.ParkingZoneSearch._ import beam.agentsim.infrastructure.parking._ import beam.agentsim.infrastructure.taz.{TAZ, TAZTreeMap} @@ -80,14 +79,7 @@ abstract class InfrastructureFunctions( import InfrastructureFunctions._ - val DefaultParkingZone: ParkingZone = - ParkingZone.defaultInit( - TAZ.DefaultTAZId, - ParkingType.Public, - UbiqiutousParkingAvailability - ) - - val parkingZoneSearchConfiguration: ParkingZoneSearchConfiguration = + private val parkingZoneSearchConfiguration: ParkingZoneSearchConfiguration = ParkingZoneSearchConfiguration( minSearchRadius, maxSearchRadius, @@ -245,12 +237,11 @@ abstract class InfrastructureFunctions( zoneCollections.get(parkingZone.tazId).foreach(_.releaseZone(parkingZone)) result } - } object InfrastructureFunctions { - def chargingTypeToNo( + private def chargingTypeToNo( parkingZonesSampled: List[(Id[ParkingZoneId], Option[ChargingPointType], ParkingType, Double)] ): String = { parkingZonesSampled @@ -266,7 +257,7 @@ object InfrastructureFunctions { .mkString(", ") } - def parkingTypeToNo( + private def parkingTypeToNo( parkingZonesSampled: List[(Id[ParkingZoneId], Option[ChargingPointType], ParkingType, Double)] ): String = { parkingZonesSampled @@ -277,7 +268,7 @@ object InfrastructureFunctions { .mkString(", ") } - def listOfCosts( + private def listOfCosts( parkingZonesSampled: List[(Id[ParkingZoneId], Option[ChargingPointType], ParkingType, Double)] ): String = { parkingZonesSampled diff --git a/src/main/scala/beam/agentsim/infrastructure/InfrastructureUtils.scala b/src/main/scala/beam/agentsim/infrastructure/InfrastructureUtils.scala index 589fc8e3f2c..563319926a6 100644 --- a/src/main/scala/beam/agentsim/infrastructure/InfrastructureUtils.scala +++ b/src/main/scala/beam/agentsim/infrastructure/InfrastructureUtils.scala @@ -58,7 +58,10 @@ object InfrastructureUtils extends LazyLogging { ( beamConfig.beam.agentsim.agents.freight.carrierParkingFilePath.getOrElse(""), VehicleManager - .createOrGetReservedFor(beamConfig.beam.agentsim.agents.freight.name, VehicleManager.TypeEnum.Freight), + .createOrGetReservedFor( + beamConfig.beam.agentsim.agents.freight.name, + Some(VehicleManager.TypeEnum.Freight) + ), Seq(ParkingType.Workplace) ) ) @@ -66,7 +69,7 @@ object InfrastructureUtils extends LazyLogging { val ridehailParkingFiles = beamConfig.beam.agentsim.agents.rideHail.managers.map(managerConfig => ( managerConfig.initialization.parking.filePath, - VehicleManager.createOrGetReservedFor(managerConfig.name, VehicleManager.TypeEnum.RideHail), + VehicleManager.createOrGetReservedFor(managerConfig.name, Some(VehicleManager.TypeEnum.RideHail)), Seq(ParkingType.Workplace) ) ) diff --git a/src/main/scala/beam/agentsim/infrastructure/ParallelParkingManager.scala b/src/main/scala/beam/agentsim/infrastructure/ParallelParkingManager.scala index 8639b77d70a..e31ae376b89 100644 --- a/src/main/scala/beam/agentsim/infrastructure/ParallelParkingManager.scala +++ b/src/main/scala/beam/agentsim/infrastructure/ParallelParkingManager.scala @@ -61,7 +61,7 @@ class ParallelParkingManager( ) ) - protected val tazToWorker: Map[Id[_], Worker] = + private val tazToWorker: Map[Id[_], Worker] = mapTazToWorker(workers) + (TAZ.EmergencyTAZId -> emergencyWorker) + (TAZ.DefaultTAZId -> emergencyWorker) protected def createWorker(cluster: ParkingCluster): Worker = { diff --git a/src/main/scala/beam/agentsim/infrastructure/ParkingFunctions.scala b/src/main/scala/beam/agentsim/infrastructure/ParkingFunctions.scala index 613bc503e78..57e3b2684fc 100644 --- a/src/main/scala/beam/agentsim/infrastructure/ParkingFunctions.scala +++ b/src/main/scala/beam/agentsim/infrastructure/ParkingFunctions.scala @@ -10,6 +10,8 @@ import beam.agentsim.infrastructure.taz.{TAZ, TAZTreeMap} import beam.sim.config.BeamConfig.Beam.Agentsim.Agents.Parking import org.locationtech.jts.geom.Envelope import org.matsim.api.core.v01.{Coord, Id} +import org.matsim.core.utils.collections.QuadTree +import beam.agentsim.infrastructure.ParkingInquiry.ParkingActivityType._ import scala.util.Random @@ -109,6 +111,7 @@ class ParkingFunctions( ): Boolean = { if (zone.chargingPointType.isDefined) throw new RuntimeException("ParkingFunctions expect only stalls without charging points") + val preferredParkingTypes = getPreferredParkingTypes(inquiry) val canCarParkHere: Boolean = canThisCarParkHere(zone, inquiry, preferredParkingTypes) canCarParkHere @@ -133,21 +136,20 @@ class ParkingFunctions( ) ParkingZoneSearch.ParkingZoneSearchResult(newStall, DefaultParkingZone) case _ => - inquiry.parkingActivityType match { - case ParkingActivityType.Home if inquiry.searchMode != ParkingSearchMode.EnRouteCharging => - val newStall = ParkingStall.defaultResidentialStall(inquiry.destinationUtm.loc, inquiry.activityType) - ParkingZoneSearch.ParkingZoneSearchResult(newStall, DefaultParkingZone) + val destinationLocation = inquiry.destinationUtm.loc + val taz = tazTreeMap.getTAZ(destinationLocation) + val (newStall, zone) = inquiry.parkingActivityType match { + case ParkingActivityType.Home => + ParkingStall.defaultStallAtLocation(destinationLocation, taz.tazId, ParkingType.Residential) + case ParkingActivityType.Depot => + ParkingStall.defaultStallAtLocation(destinationLocation, taz.tazId, ParkingType.Depot) + case ParkingActivityType.Commercial => + ParkingStall.obstructiveStallAtLocation(destinationLocation, taz.tazId, ParkingType.Commercial) case _ => // didn't find any stalls, so, as a last resort, create a very expensive stall - val boxAroundRequest = new Envelope( - inquiry.destinationUtm.loc.getX + 100, - inquiry.destinationUtm.loc.getX - 100, - inquiry.destinationUtm.loc.getY + 100, - inquiry.destinationUtm.loc.getY - 100 - ) - val newStall = ParkingStall.lastResortStall(boxAroundRequest, new Random(seed)) - ParkingZoneSearch.ParkingZoneSearchResult(newStall, DefaultParkingZone) + ParkingStall.lastResortStall(destinationLocation, new Random(seed)) } + ParkingZoneSearch.ParkingZoneSearchResult(newStall, zone) } Some(output) } @@ -218,7 +220,12 @@ class ParkingFunctions( .forall(_.contains(inquiry.destinationUtm.time % (24 * 3600))) ) - validParkingType && isValidTime + val isValidManager = + inquiry.beamVehicle.forall { vehicle => + zone.reservedFor == VehicleManager.AnyManager || vehicle.vehicleManagerId.get() == zone.reservedFor.managerId + } + + validParkingType && isValidTime && isValidManager } /** @@ -230,19 +237,28 @@ class ParkingFunctions( protected def getPreferredParkingTypes(inquiry: ParkingInquiry): Set[ParkingType] = { // a lookup for valid parking types based on this inquiry if (inquiry.searchMode == ParkingSearchMode.EnRouteCharging) { - Set(ParkingType.Public) + inquiry.parkingActivityType match { + case Commercial => Set(ParkingType.Commercial, ParkingType.Depot) + case Depot => Set(ParkingType.Commercial, ParkingType.Depot) + case _ => Set(ParkingType.Public, ParkingType.Depot) + } } else if (inquiry.searchMode == ParkingSearchMode.Init) { inquiry.parkingActivityType match { - case ParkingActivityType.Home => Set(ParkingType.Residential) - case ParkingActivityType.Work => Set(ParkingType.Workplace) - case _ => Set(ParkingType.Public) + case Home => Set(ParkingType.Residential) + case Work => Set(ParkingType.Workplace) + case Depot => Set(ParkingType.Depot) + case ParkingActivityType.Freight => Set(ParkingType.Depot) + case_ => Set(ParkingType.Public) } } else { inquiry.parkingActivityType match { - case ParkingActivityType.Home => Set(ParkingType.Residential, ParkingType.Public) - case ParkingActivityType.Work => Set(ParkingType.Workplace, ParkingType.Public) - case ParkingActivityType.Charge => Set(ParkingType.Workplace, ParkingType.Public, ParkingType.Residential) - case _ => Set(ParkingType.Public) + case Home => Set(ParkingType.Residential, ParkingType.Public) + case Work => Set(ParkingType.Workplace, ParkingType.Public) + case Charge => Set(ParkingType.Public) + case Commercial => Set(ParkingType.Commercial) + case Depot => Set(ParkingType.Depot) + case ParkingActivityType.Freight => Set(ParkingType.Commercial, ParkingType.Depot) + case_ => Set(ParkingType.Public) } } } diff --git a/src/main/scala/beam/agentsim/infrastructure/ParkingInquiry.scala b/src/main/scala/beam/agentsim/infrastructure/ParkingInquiry.scala index 969154fda97..f9b7b93a4b3 100644 --- a/src/main/scala/beam/agentsim/infrastructure/ParkingInquiry.scala +++ b/src/main/scala/beam/agentsim/infrastructure/ParkingInquiry.scala @@ -42,6 +42,7 @@ case class ParkingInquiry( originUtm: Option[SpaceTime] = None, triggerId: Long ) extends HasTriggerId { + val parkingActivityType: ParkingActivityType = activityTypeStringToEnum(activityType) val departureLocation: Option[Coord] = searchMode match { @@ -69,23 +70,55 @@ object ParkingInquiry extends LazyLogging { case object Wherever extends ParkingActivityType case object Home extends ParkingActivityType case object Work extends ParkingActivityType - case object EnRoute extends ParkingActivityType + case object Commercial extends ParkingActivityType + case object Depot extends ParkingActivityType case object IDLE extends ParkingActivityType + case object Freight extends ParkingActivityType } + // Pre-compiled lookup table for exact matches (O(1) lookup) + private val exactMatches = Map( + "home" -> ParkingActivityType.Home, + "work" -> ParkingActivityType.Work, + "charge" -> ParkingActivityType.Charge, + "wherever" -> ParkingActivityType.Wherever, + "eatout" -> ParkingActivityType.Wherever, + "othdiscr" -> ParkingActivityType.Wherever, + "othmaint" -> ParkingActivityType.Wherever, + "school" -> ParkingActivityType.Wherever, + "escort" -> ParkingActivityType.Wherever, + "social" -> ParkingActivityType.Wherever, + "idle" -> ParkingActivityType.IDLE, + "depot" -> ParkingActivityType.Freight, + "commercial" -> ParkingActivityType.Freight, + "loading" -> ParkingActivityType.Freight, + "unloading" -> ParkingActivityType.Freight, + "warehouse" -> ParkingActivityType.Freight + ) + + // Pre-compiled prefix patterns for startsWith checks + private val freightPrefixes = Set("depot", "commercial", "loading", "unloading", "warehouse") + def activityTypeStringToEnum(activityType: String): ParkingActivityType = { - activityType.toLowerCase match { - case "home" => ParkingActivityType.Home - case "work" => ParkingActivityType.Work - case "charge" => ParkingActivityType.Charge - case "wherever" => ParkingActivityType.Wherever - case "idle" => ParkingActivityType.IDLE - case otherType if otherType.contains("enroute") => ParkingActivityType.Charge - case otherType if otherType.contains("home") => ParkingActivityType.Home - case otherType if otherType.contains("work") => ParkingActivityType.Work - case otherType => - logger.debug(s"This Parking Activity Type ($otherType) has not been defined") - ParkingActivityType.Wherever + val lowerType = activityType.toLowerCase + + // Try exact match first (fastest - O(1)) + exactMatches.get(lowerType) match { + case Some(result) => result + case None => + // Check prefixes (only if exact match failed) + if (freightPrefixes.exists(lowerType.startsWith)) { + ParkingActivityType.Freight + } else if (lowerType.contains("enroute")) { + ParkingActivityType.Charge + } else if (lowerType.contains("home")) { + ParkingActivityType.Home + } else if (lowerType.contains("work")) { + ParkingActivityType.Work + } else { + logger.debug(s"This Parking Activity Type ($lowerType) has not been defined") + ParkingActivityType.Wherever + } } } diff --git a/src/main/scala/beam/agentsim/infrastructure/ParkingStall.scala b/src/main/scala/beam/agentsim/infrastructure/ParkingStall.scala index bbf441038dd..1d2662e8d94 100755 --- a/src/main/scala/beam/agentsim/infrastructure/ParkingStall.scala +++ b/src/main/scala/beam/agentsim/infrastructure/ParkingStall.scala @@ -62,98 +62,93 @@ object ParkingStall { * @param coord the location for the stall * @return a new parking stall with the default Id[Taz] and parkingZoneId */ - def defaultStall(coord: Coord): ParkingStall = ParkingStall( - tazId = TAZ.DefaultTAZId, - parkingZoneId = ParkingZone.DefaultParkingZoneId, - locationUTM = coord, - costInDollars = 0.0, - chargingPointType = None, - pricingModel = None, - parkingType = ParkingType.Public, - activityType = "default", + def defaultStall(coord: Coord): (ParkingStall, ParkingZone) = { + val newStall = ParkingStall( + tazId = TAZ.DefaultTAZId, + parkingZoneId = ParkingZone.DefaultParkingZone.parkingZoneId, + locationUTM = coord, + costInDollars = 0.0, + chargingPointType = None, + pricingModel = None, + parkingType = ParkingType.Public, + activityType = "default", reservedFor = VehicleManager.AnyManager - ) + )(newStall, ParkingZone.DefaultParkingZone) + } /** * take a stall from the infinite parking zone, with a random location by default from planet-wide UTM values * - * @param random random number generator - * @param boundingBox bounding box + * @param generateRandomLocationUsingThis random number generator * @param costInDollars the cost of this stall * @return a stall that costs a lot but at least it exists. it's coordinate can be anywhere on the planet. for routing, the nearest link should be found using Beam Geotools. */ def lastResortStall( - boundingBox: Envelope, - random: Random = Random, + location: Location, + random: Random, costInDollars: Double = 50.0 - ): ParkingStall = { + ): (ParkingStall, ParkingZone) = { + val boundingBox = new Envelope( + location.getX + 2000, + location.getX - 2000, + location.getY + 2000, + location.getY - 2000 + ) val x = random.nextDouble() * (boundingBox.getMaxX - boundingBox.getMinX) + boundingBox.getMinX val y = random.nextDouble() * (boundingBox.getMaxY - boundingBox.getMinY) + boundingBox.getMinY - + val stallLocation = new Coord(x, y) ParkingStall( tazId = TAZ.EmergencyTAZId, - parkingZoneId = ParkingZone.DefaultParkingZoneId, - locationUTM = new Coord(x, y), + parkingZoneId = ParkingZone.DefaultParkingZone.parkingZoneId, + locationUTM = stallLocation, costInDollars = costInDollars, chargingPointType = None, - pricingModel = Some { - PricingModel.FlatFee(costInDollars.toInt) - }, + pricingModel = Some { PricingModel.FlatFee(costInDollars.toInt) }, parkingType = ParkingType.Public, activityType = "emergency", reservedFor = VehicleManager.AnyManager - ) + ) -> ParkingZone.DefaultParkingZone } - //#Art - - /** - * take a stall from the infinite parking zone, with a location at the request (e.g. traveler's home location). - * This should only kick in when all other (potentially non-free, non-colocated) stalls in the search area are - * exhausted - * - * @param locationUTM request location (home) - * @return a stall that is free and located at the person's home. - */ - def defaultResidentialStall(locationUTM: Location, activity: String): ParkingStall = ParkingStall( - tazId = TAZ.DefaultTAZId, - parkingZoneId = ParkingZone.DefaultParkingZoneId, - locationUTM = locationUTM, - costInDollars = 0.0, - chargingPointType = None, - pricingModel = Some(PricingModel.FlatFee(0)), - parkingType = ParkingType.Residential, - activityType = activity, - reservedFor = VehicleManager.AnyManager - ) + def obstructiveStallAtLocation( + location: Location, + tazId: Id[TAZ], + parkingType: ParkingType, + costInDollars: Double = CostOfEmergencyStallInDollars + ): (ParkingStall, ParkingZone) = { + ParkingStall( + tazId = tazId, + parkingZoneId = ParkingZone.ObstructiveParkingZone.parkingZoneId, + locationUTM = location, + costInDollars = costInDollars, + chargingPointType = None, + pricingModel = Some { PricingModel.FlatFee(costInDollars.toInt) }, + parkingType = parkingType, + reservedFor = VehicleManager.AnyManager + ) -> ParkingZone.ObstructiveParkingZone + } - def doubleParkingStall(tazId: Id[TAZ], locationUTM: Location, activity: String): ParkingStall = ParkingStall( - tazId = tazId, - parkingZoneId = ParkingZone.DefaultParkingZoneId, - locationUTM = locationUTM, - costInDollars = 0.0, - chargingPointType = None, - pricingModel = Some(PricingModel.FlatFee(0)), - parkingType = ParkingType.DoubleParking, - activityType = activity, - reservedFor = VehicleManager.AnyManager - ) + def defaultStallAtLocation( + location: Location, + tazId: Id[TAZ], + parkingType: ParkingType, + costInDollars: Double = CostOfEmergencyStallInDollars + ): (ParkingStall, ParkingZone) = { + ParkingStall( + tazId = tazId, + parkingZoneId = ParkingZone.DefaultParkingZone.parkingZoneId, + locationUTM = location, + costInDollars = costInDollars, + chargingPointType = None, + pricingModel = Some { + PricingModel.FlatFee(costInDollars.toInt) + }, + parkingType = parkingType, + reservedFor = VehicleManager.AnyManager + ) -> ParkingZone.DefaultParkingZone + } - /** - * @param locationUTM Location - * @return - */ - def defaultFastChargingStall(locationUTM: Location): ParkingStall = ParkingStall( - tazId = TAZ.DefaultTAZId, - parkingZoneId = ParkingZone.DefaultParkingZoneId, - locationUTM = locationUTM, - costInDollars = 0.0, - chargingPointType = Some(ChargingPointType.ChargingStationCcsComboType2), - pricingModel = Some(PricingModel.FlatFee(0)), - parkingType = ParkingType.Public, - activityType = "charging", - reservedFor = VehicleManager.AnyManager - ) + //#Art /** * Convenience method to convert a [[ParkingAlternative]] to a [[ParkingStall]] @@ -178,5 +173,4 @@ object ParkingStall { parkingAlternative.parkingZone.reservedFor ) } - } diff --git a/src/main/scala/beam/agentsim/infrastructure/RideHailDepotFunctions.scala b/src/main/scala/beam/agentsim/infrastructure/RideHailDepotFunctions.scala index 01ab0f951f4..c5338a00ed4 100644 --- a/src/main/scala/beam/agentsim/infrastructure/RideHailDepotFunctions.scala +++ b/src/main/scala/beam/agentsim/infrastructure/RideHailDepotFunctions.scala @@ -144,14 +144,8 @@ class RideHailDepotFunctions( result.copy(parkingStall = updatedParkingStall) case _ => // didn't find any stalls, so, as a last resort, create a very expensive stall - val boxAroundRequest = new Envelope( - inquiry.destinationUtm.loc.getX + 100, - inquiry.destinationUtm.loc.getX - 100, - inquiry.destinationUtm.loc.getY + 100, - inquiry.destinationUtm.loc.getY - 100 - ) - val newStall = ParkingStall.lastResortStall(boxAroundRequest, new Random(seed)) - ParkingZoneSearch.ParkingZoneSearchResult(newStall, DefaultParkingZone) + val (newStall, defaultZone) = ParkingStall.lastResortStall(inquiry.destinationUtm.loc, new Random(seed)) + ParkingZoneSearch.ParkingZoneSearchResult(newStall, defaultZone) } Some(output) } @@ -182,7 +176,7 @@ class RideHailDepotFunctions( * @param tick Int * @return */ - def secondsToServiceQueueAndChargingVehicles( + private def secondsToServiceQueueAndChargingVehicles( parkingZone: ParkingZone, tick: Int ): Int = { @@ -219,7 +213,7 @@ class RideHailDepotFunctions( * @param parkingZoneId ID of the parking zone * @return Parking zone location in UTM. */ - def getParkingZoneLocationUtm(parkingZoneId: Id[ParkingZoneId]): Coord = { + private def getParkingZoneLocationUtm(parkingZoneId: Id[ParkingZoneId]): Coord = { val parkingZone = parkingZones(parkingZoneId) parkingZone.link.fold { tazTreeMap.idToTAZMapping(parkingZone.tazId).coord diff --git a/src/main/scala/beam/agentsim/infrastructure/RideHailDepotNetwork.scala b/src/main/scala/beam/agentsim/infrastructure/RideHailDepotNetwork.scala index 9fa505ca79e..b41779cc44a 100644 --- a/src/main/scala/beam/agentsim/infrastructure/RideHailDepotNetwork.scala +++ b/src/main/scala/beam/agentsim/infrastructure/RideHailDepotNetwork.scala @@ -1,11 +1,10 @@ package beam.agentsim.infrastructure import beam.agentsim.infrastructure.parking.{ParkingZone, ParkingZoneId} -import beam.agentsim.infrastructure.taz.{TAZ, TAZTreeMap} +import beam.agentsim.infrastructure.taz.TAZTreeMap import beam.sim.BeamServices import org.locationtech.jts.geom.Envelope import org.matsim.api.core.v01.Id -import org.matsim.core.utils.collections.QuadTree class RideHailDepotNetwork(override val parkingZones: Map[Id[ParkingZoneId], ParkingZone]) extends ChargingNetwork(parkingZones) { @@ -18,10 +17,10 @@ object RideHailDepotNetwork { // a ride hail agent is searching for a charging depot and is not in service of an activity. // for this reason, a higher max radius is reasonable. - val SearchStartRadius: Double = 40000.0 // meters - val SearchMaxRadius: Int = 80465 // 50 miles, in meters - val FractionOfSameTypeZones: Double = 0.2 // 20% - val MinNumberOfSameTypeZones: Int = 5 + private val SearchStartRadius: Double = 40000.0 // meters + private val SearchMaxRadius: Int = 80465 // 50 miles, in meters + private val FractionOfSameTypeZones: Double = 0.2 // 20% + private val MinNumberOfSameTypeZones: Int = 5 def apply( parkingZones: Map[Id[ParkingZoneId], ParkingZone], diff --git a/src/main/scala/beam/agentsim/infrastructure/TrivialParkingManager.scala b/src/main/scala/beam/agentsim/infrastructure/TrivialParkingManager.scala index 9d6c3f27ff3..3bbc012587a 100644 --- a/src/main/scala/beam/agentsim/infrastructure/TrivialParkingManager.scala +++ b/src/main/scala/beam/agentsim/infrastructure/TrivialParkingManager.scala @@ -9,7 +9,7 @@ class TrivialParkingManager extends Actor { private var nextStallNum = 0 override def receive: Receive = { case request: ParkingInquiry => - val stall = ParkingStall.defaultStall(request.destinationUtm.loc) + val stall = ParkingStall.defaultStall(request.destinationUtm.loc)._1 sender ! ParkingInquiryResponse(stall, request.requestId, request.triggerId) nextStallNum += 1 } @@ -20,7 +20,7 @@ class AnotherTrivialParkingManager(location: Coord) extends LoggingMessageActor private var nextStallNum = 0 override def loggedReceive: Receive = { case request: ParkingInquiry => - val stall = ParkingStall.defaultStall(location) + val stall = ParkingStall.defaultStall(location)._1 sender ! ParkingInquiryResponse(stall, request.requestId, request.triggerId) nextStallNum += 1 } diff --git a/src/main/scala/beam/agentsim/infrastructure/charging/ChargingPointType.scala b/src/main/scala/beam/agentsim/infrastructure/charging/ChargingPointType.scala index 708c888ec70..f644d020135 100644 --- a/src/main/scala/beam/agentsim/infrastructure/charging/ChargingPointType.scala +++ b/src/main/scala/beam/agentsim/infrastructure/charging/ChargingPointType.scala @@ -71,17 +71,17 @@ object ChargingPointType { // these were breaking some tests with a ChargingPoint parsing error caused by Event handlers def apply(s: String): Option[ChargingPointType] = { s.trim.toLowerCase match { - case "householdsocket" => Some(HouseholdSocket) - case "bluehouseholdsocket" => Some(BlueHouseholdSocket) - case "cee16asocket" => Some(Cee16ASocket) - case "cee32asocket" => Some(Cee32ASocket) - case "cee63asocket" => Some(Cee63ASocket) - case "chargingstationtype1" => Some(ChargingStationType1) - case "chargingstationtype2" => Some(ChargingStationType2) - case "chargingstationccscombotype1" => Some(ChargingStationCcsComboType1) - case "chargingstationccscombotype2" => Some(ChargingStationCcsComboType2) - case "teslasupercharger" => Some(TeslaSuperCharger) - case "nocharger" | "none" | "" => None + case "householdsocket" => Some(HouseholdSocket) + case "bluehouseholdsocket" => Some(BlueHouseholdSocket) + case "cee16asocket" => Some(Cee16ASocket) + case "cee32asocket" => Some(Cee32ASocket) + case "cee63asocket" => Some(Cee63ASocket) + case "chargingstationtype1" | "level1" => Some(ChargingStationType1) + case "chargingstationtype2" | "level2" => Some(ChargingStationType2) + case "chargingstationccscombotype1" => Some(ChargingStationCcsComboType1) + case "chargingstationccscombotype2" => Some(ChargingStationCcsComboType2) + case "teslasupercharger" => Some(TeslaSuperCharger) + case "nocharger" | "none" | "" => None case CustomChargingPointRegex(id, installedCapacity, currentType) => Some(CustomChargingPoint(id, installedCapacity, currentType)) case _ => diff --git a/src/main/scala/beam/agentsim/infrastructure/parking/ParkingNetwork.scala b/src/main/scala/beam/agentsim/infrastructure/parking/ParkingNetwork.scala index a43ff3d1ab4..9c588b5a4ed 100644 --- a/src/main/scala/beam/agentsim/infrastructure/parking/ParkingNetwork.scala +++ b/src/main/scala/beam/agentsim/infrastructure/parking/ParkingNetwork.scala @@ -12,8 +12,8 @@ abstract class ParkingNetwork(parkingZones: Map[Id[ParkingZoneId], ParkingZone]) protected val searchFunctions: Option[InfrastructureFunctions] // Core - protected var totalStallsInUse: Long = 0L - protected var totalStallsAvailable: Long = parkingZones.map(_._2.stallsAvailable).sum + private var totalStallsInUse: Long = 0L + private var totalStallsAvailable: Long = parkingZones.map(_._2.stallsAvailable).sum /** * @param inquiry ParkingInquiry @@ -51,7 +51,7 @@ abstract class ParkingNetwork(parkingZones: Map[Id[ParkingZoneId], ParkingZone]) */ def processReleaseParkingStall(release: ReleaseParkingStall): Boolean = { val parkingZoneId = release.stall.parkingZoneId - val released: Boolean = if (parkingZoneId == ParkingZone.DefaultParkingZoneId) { + val released: Boolean = if (parkingZoneId == ParkingZone.DefaultParkingZone.parkingZoneId) { // this is an infinitely available resource; no update required logger.debug("Releasing a stall in the default/emergency zone") true diff --git a/src/main/scala/beam/agentsim/infrastructure/parking/ParkingType.scala b/src/main/scala/beam/agentsim/infrastructure/parking/ParkingType.scala index 36491bae9b4..215a6808597 100644 --- a/src/main/scala/beam/agentsim/infrastructure/parking/ParkingType.scala +++ b/src/main/scala/beam/agentsim/infrastructure/parking/ParkingType.scala @@ -31,11 +31,11 @@ object ParkingType { def apply(s: String): ParkingType = { s match { - case "Residential" => Residential - case "Public" => Public - case "Workplace" => Workplace - case "Commercial" => Commercial - case "Depot" => Depot + case "Residential" => Residential + case "Public" => Public + case "Workplace" => Workplace + case "Commercial" => Commercial + case "Depot" => Depot case "DoubleParking" => DoubleParking } } diff --git a/src/main/scala/beam/agentsim/infrastructure/parking/ParkingZone.scala b/src/main/scala/beam/agentsim/infrastructure/parking/ParkingZone.scala index e8489cab6b0..ab30387b5bc 100644 --- a/src/main/scala/beam/agentsim/infrastructure/parking/ParkingZone.scala +++ b/src/main/scala/beam/agentsim/infrastructure/parking/ParkingZone.scala @@ -4,7 +4,6 @@ import beam.agentsim.agents.vehicles.VehicleCategory.VehicleCategory import beam.agentsim.agents.vehicles.VehicleManager import beam.agentsim.agents.vehicles.VehicleManager.ReservedFor import beam.agentsim.infrastructure.charging.ChargingPointType -import beam.agentsim.infrastructure.power.SitePowerManager import beam.agentsim.infrastructure.taz.TAZ import com.typesafe.scalalogging.LazyLogging import org.matsim.api.core.v01.Id @@ -34,6 +33,7 @@ class ParkingZone( val pricingModel: Option[PricingModel], val timeRestrictions: Map[VehicleCategory, Range], val link: Option[Link], + val siteId: Id[SitePowerManager], val sitePowerManager: Option[String], val energyStorageCapacityInKWh: Option[Double], val energyStorageSOC: Option[Double] @@ -68,14 +68,48 @@ class ParkingZone( object ParkingZone extends LazyLogging { - val DefaultParkingZoneId: Id[ParkingZoneId] = Id.create("default", classOf[ParkingZoneId]) - // used in place of Int.MaxValue to avoid possible buffer overrun due to async failures // in other words, while stallsAvailable of a ParkingZone should never exceed the numStalls // it started with, it could be possible in the system to happen due to scheduler issues. if // it does, it would be more helpful for it to reflect with a reasonable number, ie., 1000001, // which would tell us that we had 1 extra releaseStall event. - val UbiqiutousParkingAvailability: Int = 1000000 + val UbiquitousParkingAvailability: Int = 1000000 + + val DefaultParkingZone: ParkingZone = { + val defaultParkingZoneId: Id[ParkingZoneId] = Id.create("default", classOf[ParkingZoneId]) + init( + Some(defaultParkingZoneId), + TAZ.DefaultTAZId, + ParkingType.Public, + VehicleManager.AnyManager, + Some(SitePowerManager.createId(defaultParkingZoneId.toString)), + UbiquitousParkingAvailability + ) + } + + val ObstructiveParkingZone: ParkingZone = { + val defaultParkingZoneId: Id[ParkingZoneId] = Id.create("obstructive", classOf[ParkingZoneId]) + init( + Some(defaultParkingZoneId), + TAZ.DefaultTAZId, + ParkingType.Public, + VehicleManager.AnyManager, + Some(SitePowerManager.createId(defaultParkingZoneId.toString)), + UbiquitousParkingAvailability + ) + } + + val EmergencyParkingZone: ParkingZone = { + val defaultParkingZoneId: Id[ParkingZoneId] = Id.create("emergency", classOf[ParkingZoneId]) + init( + Some(defaultParkingZoneId), + TAZ.EmergencyTAZId, + ParkingType.Public, + VehicleManager.AnyManager, + Some(SitePowerManager.createId(defaultParkingZoneId.toString)), + UbiquitousParkingAvailability + ) + } /** * creates a new StallValues object @@ -115,20 +149,6 @@ object ParkingZone extends LazyLogging { energyStorageSOC ) - def defaultInit( - geoId: Id[TAZ], - parkingType: ParkingType, - numStalls: Int - ): ParkingZone = { - init( - Some(DefaultParkingZoneId), - geoId, - parkingType, - VehicleManager.AnyManager, - numStalls - ) - } - def init( parkingZoneIdMaybe: Option[Id[ParkingZoneId]], geoId: Id[TAZ], @@ -171,7 +191,7 @@ object ParkingZone extends LazyLogging { * @return True|False (representing success) wrapped in an effect type */ def releaseStall(parkingZone: ParkingZone): Boolean = - if (parkingZone.parkingZoneId == DefaultParkingZoneId) { + if (parkingZone.parkingZoneId == DefaultParkingZone.parkingZoneId) { // this zone does not exist in memory but it has infinitely many stalls to release true } else if (parkingZone.stallsAvailable + 1 > parkingZone.maxStalls) { @@ -189,7 +209,7 @@ object ParkingZone extends LazyLogging { * @return True|False (representing success) wrapped in an effect type */ def claimStall(parkingZone: ParkingZone): Boolean = - if (parkingZone.parkingZoneId == DefaultParkingZoneId) { + if (parkingZone.parkingZoneId == DefaultParkingZone.parkingZoneId) { // this zone does not exist in memory but it has infinitely many stalls to release true } else if (parkingZone.stallsAvailable - 1 >= 0) { @@ -200,24 +220,6 @@ object ParkingZone extends LazyLogging { false } - /** - * Option-wrapped Array index lookup for Array[ParkingZone] - * - * @param parkingZones collection of parking zones - * @param parkingZoneId an array index - * @return Optional ParkingZone - */ - def getParkingZone( - parkingZones: Map[Id[ParkingZoneId], ParkingZone], - parkingZoneId: Id[ParkingZoneId] - ): Option[ParkingZone] = { - val result = parkingZones.get(parkingZoneId) - if (result.isEmpty) { - logger.warn(s"attempting to access parking zone with illegal parkingZoneId $parkingZoneId, will be ignored") - } - result - } - /** * construct ID of a Parking Zone * @param geoId TAZ ID @@ -238,9 +240,7 @@ object ParkingZone extends LazyLogging { val chargingPointType = chargingPointTypeMaybe.getOrElse("NoCharger") val pricingModel = pricingModelMaybe.getOrElse("Free") val costInCents = pricingModelMaybe.map(x => (x.costInDollars * 100).toInt).getOrElse(0) - createId( - s"zone-${reservedFor}-${geoId}-${parkingType}-${chargingPointType}-${pricingModel}-${costInCents}-$numStalls" - ) + createId(s"zone-$reservedFor-$geoId-$parkingType-$chargingPointType-$pricingModel-$costInCents-$numStalls") } /** diff --git a/src/main/scala/beam/agentsim/infrastructure/parking/ParkingZoneFileUtils.scala b/src/main/scala/beam/agentsim/infrastructure/parking/ParkingZoneFileUtils.scala index 21410858363..7a871d426c9 100644 --- a/src/main/scala/beam/agentsim/infrastructure/parking/ParkingZoneFileUtils.scala +++ b/src/main/scala/beam/agentsim/infrastructure/parking/ParkingZoneFileUtils.scala @@ -5,7 +5,6 @@ import beam.agentsim.agents.vehicles.VehicleManager.ReservedFor import beam.agentsim.agents.vehicles.{VehicleCategory, VehicleManager} import beam.agentsim.infrastructure.charging.ChargingPointType import beam.agentsim.infrastructure.parking.ParkingZoneSearch.ZoneSearchTree -import beam.agentsim.infrastructure.power.SitePowerManager import beam.agentsim.infrastructure.taz.TAZ import beam.sim.BeamServices import beam.sim.config.BeamConfig @@ -59,7 +58,7 @@ object ParkingZoneFileUtils extends ExponentialLazyLogging { * @param maybeChargingPoint charging point type * @return a row describing infinite free parking at this TAZ */ - def defaultParkingRow( + private def defaultParkingRow( geoId: Id[TAZ], parkingType: ParkingType, maybeChargingPoint: Option[ChargingPointType], @@ -70,7 +69,7 @@ object ParkingZoneFileUtils extends ExponentialLazyLogging { parkingType.toString, // parkingType PricingModel.FlatFee(0).toString, // pricingModel maybeChargingPoint.map(_.toString).getOrElse("NoCharger"), // chargingPointType - ParkingZone.UbiqiutousParkingAvailability.toString, // numStalls + ParkingZone.UbiquitousParkingAvailability.toString, // numStalls "0", // feeInCents defaultReservedFor.toString, // reservedFor "", // timeRestrictions @@ -413,7 +412,7 @@ object ParkingZoneFileUtils extends ExponentialLazyLogging { } } - // values look like Class456Vocational:00:00-14:00|Car:14:00-18:00|Bike:18:00-24:00 + // values look like Class456Vocational|00:00-14:00;Car|14:00-18:00;Bike|18:00-24:00; Option(timeRestrictionsString) .getOrElse("") .split(';') @@ -487,7 +486,6 @@ object ParkingZoneFileUtils extends ExponentialLazyLogging { val newCostInDollarsString = (feeInCents * parkingCostScalingFactor / 100.0).toString val reservedFor = validateReservedFor(reservedForString, beamConfig, defaultReservedFor) // parse this row from the source file - val taz = tazString.toUpperCase.createId[TAZ] val parkingType = ParkingType(parkingTypeString) val pricingModel = PricingModel(pricingModelString, newCostInDollarsString) val timeRestrictions = parseTimeRestrictions(timeRestrictionsString) @@ -496,11 +494,18 @@ object ParkingZoneFileUtils extends ExponentialLazyLogging { val parkingZoneIdMaybe = if (isBlank(parkingZoneIdString)) Some(ParkingZone.createId(rowNumber.toString)) else Some(ParkingZone.createId(parkingZoneIdString)) - val linkMaybe = !isBlank(locationXString) && !isBlank(locationYString) match { - case true if beamServices.isDefined => - val coord = new Coord(locationXString.toDouble, locationYString.toDouble) + + val coordMaybe: Option[Coord] = for { + xLoc <- Option(locationXString).filterNot(isBlank) + yLoc <- Option(locationYString).filterNot(isBlank) + x <- Try(xLoc.toDouble).toOption + y <- Try(yLoc.toDouble).toOption + } yield new Coord(x, y) + + val linkMaybe = coordMaybe match { + case Some(coord) if beamServices.isDefined => Some(NetworkUtils.getNearestLink(beamServices.get.beamScenario.network, beamServices.get.geo.wgs2Utm(coord))) - case false if beamServices.isDefined && reservedFor.managerType == VehicleManager.TypeEnum.Household => + case None if beamServices.isDefined && reservedFor.managerType == VehicleManager.TypeEnum.Household => getHouseholdLocation(beamServices.get, reservedFor.managerId) map { homeCoord => NetworkUtils.getNearestLink( beamServices.get.beamScenario.network, @@ -509,6 +514,41 @@ object ParkingZoneFileUtils extends ExponentialLazyLogging { } case _ => None } + + val geoMap = beamServices.map(_.beamScenario.tazTreeMap) + + val tazMaybe: Option[Id[TAZ]] = (Option(tazString), geoMap) match { + case (Some(tazId), _) => + Some(tazId.toUpperCase.createId[TAZ]) + case (None, Some(tazTreeMap)) => + // Try to get TAZ from link + val tazFromLink = for { + link <- linkMaybe + taz <- tazTreeMap.getTAZfromLink(link.getId) + } yield taz.tazId + + // If that fails, try getting TAZ from coordinates + tazFromLink.orElse { + for { + bs <- beamServices + coord <- coordMaybe + utmCoord = bs.geo.wgs2Utm(coord) + taz = tazTreeMap.getTAZ(utmCoord) + } yield taz.tazId + } + case _ => None + } + + // Handle the taz result separately, with meaningful error messages if needed + val taz = tazMaybe.getOrElse { + if (geoMap.isEmpty) + throw new IllegalArgumentException("Missing tazTreeMap: cannot determine TAZ for parking zone") + else if (linkMaybe.isEmpty && coordMaybe.isEmpty) + throw new IllegalArgumentException("Missing location data: cannot determine TAZ for parking zone") + else + throw new IllegalArgumentException("Failed to determine TAZ for parking zone") + } + val sitePowerManagerMaybe = if (isBlank(sitePowerManagerString)) None else Some(sitePowerManagerString) val energyStorageCapacityMaybe = if (isBlank(energyStorageCapacityString)) None else Some(energyStorageCapacityString.toDouble) @@ -568,14 +608,34 @@ object ParkingZoneFileUtils extends ExponentialLazyLogging { } private def validateCsvRow(csvRow: jMap): Boolean = { - val allRequiredPresented = Seq("taz", "parkingType", "pricingModel", "chargingPointType", "numStalls", "feeInCents") + // Check required fields are present and non-empty + val allRequiredPresented = Seq("parkingType", "pricingModel", "chargingPointType", "numStalls", "feeInCents") .forall(key => { val value = csvRow.get(key) value != null && value.nonEmpty }) - allRequiredPresented && - Try(csvRow.get("numStalls").toDouble).toOption.exists(_ >= 0) && - Try(csvRow.get("feeInCents").toDouble).toOption.exists(_ >= 0) + + // Check that either TAZ or both location coordinates are provided + val hasTaz = Option(csvRow.get("taz")).exists(_.nonEmpty) + val hasLocationX = Option(csvRow.get("locationX")).exists(_.nonEmpty) + val hasLocationY = Option(csvRow.get("locationY")).exists(_.nonEmpty) + val hasCoordinates = hasLocationX && hasLocationY + + // Validate that at least one location identifier is present + val hasLocationIdentifier = hasTaz || hasCoordinates + + // Validate numeric fields + val validNumericFields = + Try(csvRow.get("numStalls").toDouble).toOption.exists(_ >= 0) && + Try(csvRow.get("feeInCents").toDouble).toOption.exists(_ >= 0) + + // Coordinates must be valid numbers if provided + val validCoordinates = (!hasLocationX && !hasLocationY) || + (hasCoordinates && + Try(csvRow.get("locationX").toDouble).isSuccess && + Try(csvRow.get("locationY").toDouble).isSuccess) + + allRequiredPresented && hasLocationIdentifier && validNumericFields && validCoordinates } /** @@ -720,17 +780,17 @@ object ParkingZoneFileUtils extends ExponentialLazyLogging { def rideHailParkingOutputDataDescriptor: OutputDataDescriptor = OutputDataDescriptorObject("ParkingZoneFileUtils", s"ridehailParking.csv")( """ - taz | Taz id where the parking zone resides - parkingType | Parking type: Residential, Workplace, Public - pricingModel | Pricing model - chargingPointType | Charging point type - numStalls | Number of stalls - feeInCents | Fee in cents - reservedFor | Id of Vehicle Manager this zone is reserver for - timeRestrictions | Time restrictions for vehicle categories - parkingZoneId | Parking zone id - locationX | X part of a concrete location of this parking zone (if defined) - locationY | Y part of a concrete location of this parking zone (if defined) + taz | Taz id where the parking zone resides + parkingType | Parking type: Residential, Workplace, Public + pricingModel | Pricing model + chargingPointType | Charging point type + numStalls | Number of stalls + feeInCents | Fee in cents + reservedFor | Id of Vehicle Manager this zone is reserver for + timeRestrictions | Time restrictions for vehicle categories + parkingZoneId | Parking zone id + locationX | X part of a concrete location of this parking zone (if defined) + locationY | Y part of a concrete location of this parking zone (if defined) sitePowerManager | Site power manager energyStorageCapacityInKWh | Energy storage capacity in KWh energyStorageSOC | Energy storage state of charge diff --git a/src/main/scala/beam/analysis/RideHailFleetAnalysis.scala b/src/main/scala/beam/analysis/RideHailFleetAnalysis.scala index 76de8c353ec..d7bed223c4c 100644 --- a/src/main/scala/beam/analysis/RideHailFleetAnalysis.scala +++ b/src/main/scala/beam/analysis/RideHailFleetAnalysis.scala @@ -109,28 +109,28 @@ class RideHailFleetAnalysisInternal( if (ev && isCAV) { collectEvent( rideHailEvCav, - pathTraversalEvent.copy(time = pathTraversalEvent.departureTime.toDouble - 0.5), + pathTraversalEvent.copy(time = pathTraversalEvent.departureTime - 0.5f), vehicle, pathTraversalEvent.time ) } else if (ev && !isCAV) { collectEvent( ridehailEvNonCav, - pathTraversalEvent.copy(time = pathTraversalEvent.departureTime.toDouble - 0.5), + pathTraversalEvent.copy(time = pathTraversalEvent.departureTime - 0.5f), vehicle, pathTraversalEvent.time ) } else if (!ev && isCAV) { collectEvent( rideHailNonEvCav, - pathTraversalEvent.copy(time = pathTraversalEvent.departureTime.toDouble - 0.5), + pathTraversalEvent.copy(time = pathTraversalEvent.departureTime - 0.5f), vehicle, pathTraversalEvent.time ) } else if (!ev && !isCAV) { collectEvent( rideHailNonEvNonCav, - pathTraversalEvent.copy(time = pathTraversalEvent.departureTime.toDouble - 0.5), + pathTraversalEvent.copy(time = pathTraversalEvent.departureTime - 0.5f), vehicle, pathTraversalEvent.time ) diff --git a/src/main/scala/beam/replanning/ReplanningUtil.scala b/src/main/scala/beam/replanning/ReplanningUtil.scala index 893461aaeac..df3726b3433 100644 --- a/src/main/scala/beam/replanning/ReplanningUtil.scala +++ b/src/main/scala/beam/replanning/ReplanningUtil.scala @@ -2,6 +2,7 @@ package beam.replanning import beam.router.model.EmbodiedBeamTrip import beam.utils.DebugLib +import com.typesafe.scalalogging.LazyLogging import org.matsim.api.core.v01.population._ import org.matsim.core.config.groups.PlanCalcScoreConfigGroup import org.matsim.core.population.PopulationUtils @@ -9,7 +10,7 @@ import org.matsim.core.replanning.selectors.RandomPlanSelector import scala.collection.JavaConverters._ -object ReplanningUtil { +object ReplanningUtil extends LazyLogging { def makeExperiencedMobSimCompatible[T <: Plan, I](person: HasPlansAndId[T, I]): Unit = { val experiencedPlan = person.getSelectedPlan.getCustomAttributes @@ -19,14 +20,21 @@ object ReplanningUtil { if (experiencedPlan != null && experiencedPlan.getPlanElements.size() > 0) { // keep track of the vehicles that been used during previous simulation for (i <- 0 until (experiencedPlan.getPlanElements.size() - 1)) { - experiencedPlan.getPlanElements.get(i) match { - case leg: Leg => - // Make sure it is not `null` - Option(x = person.getSelectedPlan.getPlanElements.get(i).getAttributes.getAttribute("vehicles")).foreach { - attibValue => - leg.getAttributes.putAttribute("vehicles", attibValue) - } - case _ => + if (i >= person.getSelectedPlan.getPlanElements.size() || i >= experiencedPlan.getPlanElements.size()) { + logger.error(s"Skipping index $i: experiencedPlan has more elements (${experiencedPlan.getPlanElements + .size()}) than selectedPlan (${person.getSelectedPlan.getPlanElements.size()}) for person ${person.getId}") + // Skip this iteration instead of exiting the function + // Use "return" only if you want to stop processing the entire function + } else { + experiencedPlan.getPlanElements.get(i) match { + case leg: Leg => + // Make sure it is not `null` + Option(x = person.getSelectedPlan.getPlanElements.get(i).getAttributes.getAttribute("vehicles")).foreach { + attibValue => + leg.getAttributes.putAttribute("vehicles", attibValue) + } + case _ => + } } } // BeamMobsim needs activities with coords diff --git a/src/main/scala/beam/router/BeamTravelTime.scala b/src/main/scala/beam/router/BeamTravelTime.scala new file mode 100644 index 00000000000..735a4fe9f37 --- /dev/null +++ b/src/main/scala/beam/router/BeamTravelTime.scala @@ -0,0 +1,71 @@ +package beam.router + +import beam.utils.NetworkHelper +import org.matsim.api.core.v01.network.Link +import org.matsim.api.core.v01.population.Person +import org.matsim.core.router.util.TravelTime +import org.matsim.vehicles.Vehicle + +/** + * Extension of MATSim's TravelTime interface that adds methods for more efficient + * travel time lookups using integer link IDs directly. + */ +trait BeamTravelTime extends TravelTime { + + /** + * Get travel time using integer link ID directly. + * This avoids the overhead of Link object lookups and string parsing. + */ + def getLinkTravelTime(linkId: Int, time: Double): Double + + /** + * Optional method that can also accept pre-computed link length for further optimization. + */ + def getLinkTravelTime(linkId: Int, time: Double, linkLengthMeters: Double): Double = + getLinkTravelTime(linkId, time) +} + +/** + * Free flow travel time implementation optimized for direct integer ID access. + */ +class BeamFreeFlowTravelTime(networkHelper: NetworkHelper) extends BeamTravelTime { + + // Cache link lengths for faster access + private val linkLengths: Array[Double] = { + val maxLinkId = networkHelper.allLinks.map(link => Integer.parseInt(link.getId.toString)).max + + val lengths = new Array[Double](maxLinkId + 1) + networkHelper.allLinks.foreach { link => + val id = Integer.parseInt(link.getId.toString) + lengths(id) = link.getLength + } + lengths + } + + // Cache link free speeds for faster access + private val linkFreeSpeeds: Array[Double] = { + val maxLinkId = networkHelper.allLinks.map(link => Integer.parseInt(link.getId.toString)).max + + val speeds = new Array[Double](maxLinkId + 1) + networkHelper.allLinks.foreach { link => + val id = Integer.parseInt(link.getId.toString) + speeds(id) = link.getFreespeed + } + speeds + } + + // Original MATSim interface method + override def getLinkTravelTime(link: Link, time: Double, person: Person, vehicle: Vehicle): Double = { + link.getLength / link.getFreespeed + } + + // Optimized method using integer ID + override def getLinkTravelTime(linkId: Int, time: Double): Double = { + linkLengths(linkId) / linkFreeSpeeds(linkId) + } + + // Further optimized method with pre-computed length + override def getLinkTravelTime(linkId: Int, time: Double, linkLengthMeters: Double): Double = { + linkLengthMeters / linkFreeSpeeds(linkId) + } +} diff --git a/src/main/scala/beam/router/LinkTravelTimeContainer.scala b/src/main/scala/beam/router/LinkTravelTimeContainer.scala index caf58b1131f..9bf70685d28 100755 --- a/src/main/scala/beam/router/LinkTravelTimeContainer.scala +++ b/src/main/scala/beam/router/LinkTravelTimeContainer.scala @@ -14,13 +14,13 @@ import scala.collection.mutable import scala.util.Try class LinkTravelTimeContainer(fileName: String, timeBinSizeInSeconds: Int, maxHour: Int) - extends TravelTime + extends BeamTravelTime with LazyLogging { - private val travelTimeCalculator: TravelTime = + private val travelTimeCalculator: BeamTravelTime = TravelTimeCalculatorHelper.CreateTravelTimeCalculator(timeBinSizeInSeconds, loadLinkStats().asJava) - def loadLinkStats(): scala.collection.Map[String, Array[Double]] = { + private def loadLinkStats(): scala.collection.Map[String, Array[Double]] = { val start = System.currentTimeMillis() val linkTravelTimeMap: mutable.HashMap[String, Array[Double]] = mutable.HashMap() logger.info(s"Stats fileName [$fileName] is being loaded") @@ -58,4 +58,11 @@ class LinkTravelTimeContainer(fileName: String, timeBinSizeInSeconds: Int, maxHo travelTimeCalculator.getLinkTravelTime(link, time, person, vehicle) } + /** + * Get travel time using integer link ID directly. + * This avoids the overhead of Link object lookups and string parsing. + */ + def getLinkTravelTime(linkId: Int, time: Double): Double = { + travelTimeCalculator.getLinkTravelTime(linkId, time) + } } diff --git a/src/main/scala/beam/router/Modes.scala b/src/main/scala/beam/router/Modes.scala index 439ebb7dce6..084b0d0b0d0 100644 --- a/src/main/scala/beam/router/Modes.scala +++ b/src/main/scala/beam/router/Modes.scala @@ -379,9 +379,10 @@ object TourModes { ) ) case _ => - val retainedVehicle = availableVehicles - .find(v => currentTourPersonalVehicle.find(availableVehicles.map(_.id).contains).contains(v.id)) - .map(_.vehicle) +// val retainedVehicle = availableVehicles +// .find(v => currentTourPersonalVehicle.find(availableVehicles.map(_.id).contains).contains(v.id)) +// .map(_.vehicle) + val retainedVehicle = None // TEMP: Trying out not retaining parent tour vehicles on subtours outcome .getOrElseUpdate(Some(WALK_BASED), mutable.Map.empty[EmbodiedBeamTrip, Option[BeamVehicle]]) diff --git a/src/main/scala/beam/router/RoutingWorker.scala b/src/main/scala/beam/router/RoutingWorker.scala index d2db72ed2ba..1f8cdbcc3f3 100755 --- a/src/main/scala/beam/router/RoutingWorker.scala +++ b/src/main/scala/beam/router/RoutingWorker.scala @@ -83,17 +83,20 @@ class RoutingWorker(workerParams: R5Parameters, networks2: Option[(TransportNetw private var r5: R5Wrapper = new R5Wrapper( workerParams, - new FreeFlowTravelTime, + new BeamFreeFlowTravelTime(networkHelper = workerParams.networkHelper), workerParams.beamConfig.beam.routing.r5.travelTimeNoiseFraction ) private var secondR5: Option[R5Wrapper] = for { (transportNetwork, network) <- networks2 - } yield new R5Wrapper( - workerParams.copy(transportNetwork = transportNetwork, networkHelper = new NetworkHelperImpl(network)), - new FreeFlowTravelTime, - workerParams.beamConfig.beam.routing.r5.travelTimeNoiseFraction - ) + } yield { + val networkHelperImpl = new NetworkHelperImpl(network) + new R5Wrapper( + workerParams.copy(transportNetwork = transportNetwork, networkHelper = networkHelperImpl), + new BeamFreeFlowTravelTime(networkHelperImpl), + workerParams.beamConfig.beam.routing.r5.travelTimeNoiseFraction + ) + } private val graphHopperDir: String = Paths.get(workerParams.beamConfig.beam.inputDirectory, "graphhopper").toString private val carGraphHopperDir: String = Paths.get(graphHopperDir, "car").toString diff --git a/src/main/scala/beam/router/r5/BikeLanesAdjustment.scala b/src/main/scala/beam/router/r5/BikeLanesAdjustment.scala index bc754a56d43..c336631031a 100644 --- a/src/main/scala/beam/router/r5/BikeLanesAdjustment.scala +++ b/src/main/scala/beam/router/r5/BikeLanesAdjustment.scala @@ -26,6 +26,10 @@ class BikeLanesAdjustment @Inject() (bikeLanesData: BikeLanesData) { } } + def bikeScaleFactor(linkId: LinkId): Double = { + scaleFactor(linkId) + } + def scaleFactor(vehicleType: BeamVehicleType, linkId: LinkId): Double = { if (vehicleType.vehicleCategory == VehicleCategory.Bike) { scaleFactor(linkId) diff --git a/src/main/scala/beam/router/r5/CarWeightCalculator.scala b/src/main/scala/beam/router/r5/CarWeightCalculator.scala index 709adae481b..5c6a43f1b8d 100644 --- a/src/main/scala/beam/router/r5/CarWeightCalculator.scala +++ b/src/main/scala/beam/router/r5/CarWeightCalculator.scala @@ -1,11 +1,9 @@ package beam.router.r5 -import beam.agentsim.agents.vehicles.BeamVehicleType +import beam.router.BeamTravelTime import org.matsim.core.router.util.TravelTime import java.util.concurrent.ThreadLocalRandom -import java.util.concurrent.atomic.AtomicInteger -import scala.util.Try class CarWeightCalculator(workerParams: R5Parameters, travelTimeNoiseFraction: Double = 0d) { private val networkHelper = workerParams.networkHelper @@ -14,49 +12,60 @@ class CarWeightCalculator(workerParams: R5Parameters, travelTimeNoiseFraction: D val maxFreeSpeed: Double = networkHelper.allLinks.map(_.getFreespeed).max / 0.621371 // Convert kph to mph private val minSpeed = workerParams.beamConfig.beam.physsim.minCarSpeedInMetersPerSecond - private val noiseIdx: AtomicInteger = new AtomicInteger(0) - - private val travelTimeNoises: Array[Double] = if (travelTimeNoiseFraction.equals(0d)) { - Array.empty - } else { - Array.fill(1000000) { - ThreadLocalRandom.current().nextDouble(1 - travelTimeNoiseFraction, 1 + travelTimeNoiseFraction) - } - } + // Pre-compute noise bounds for faster generation + private val noiseLowerBound = 1 - travelTimeNoiseFraction + private val noiseUpperBound = 1 + travelTimeNoiseFraction def calcTravelTime(linkId: Int, travelTime: TravelTime, time: Double): Double = { - calcTravelTime(linkId, travelTime, None, time, shouldAddNoise = false) + calcTravelTime(linkId, travelTime, maxFreeSpeed, time, shouldAddNoise = false) } def calcTravelTime( linkId: Int, travelTime: TravelTime, - vehicleType: Option[BeamVehicleType], + maxSpeed: Double, time: Double, - shouldAddNoise: Boolean + shouldAddNoise: Boolean, + edgeLength: Double = -1 // Allow passing pre-computed edge length ): Double = { val link = networkHelper.getLinkUnsafe(linkId) assert(link != null) - val edge = transportNetwork.streetLayer.edgeStore.getCursor(linkId) - val maxTravelTime = edge.getLengthM / minSpeed - val maxSpeed: Double = vehicleType match { - case Some(vType) => vType.maxVelocity.getOrElse(maxFreeSpeed) - case None => maxFreeSpeed - } + // Use provided edge length if available, otherwise look it up + val lengthM = + if (edgeLength > 0) edgeLength + else { + transportNetwork.streetLayer.edgeStore.lengths_mm.get(linkId / 2) / 1000.0 + } - val minTravelTime = edge.getLengthM / maxSpeed + // Pre-compute these values once + val maxTravelTime = lengthM / minSpeed + val minTravelTime = lengthM / maxSpeed - val physSimTravelTime = travelTime.getLinkTravelTime(link, time, null, null) + // Get travel time - use optimized method if available + val physSimTravelTime = travelTime match { + case beamTT: BeamTravelTime => + // Use the optimized method with pre-computed length + beamTT.getLinkTravelTime(linkId, time, lengthM) + case _ => + // Fall back to the original method + val link = networkHelper.getLinkUnsafe(linkId) + if (link == null) { + lengthM / maxSpeed // Default to free flow if link not found + } else { + travelTime.getLinkTravelTime(link, time, null, null) + } + } + + // Generate noise only if needed val physSimTravelTimeWithNoise = - if (travelTimeNoiseFraction.equals(0d) || !shouldAddNoise) { - physSimTravelTime + if (travelTimeNoiseFraction > 0d && shouldAddNoise) { + // Generate a value between 0 and 1, scale it to the noise range, then shift it + physSimTravelTime * ThreadLocalRandom.current().nextDouble(noiseLowerBound, noiseUpperBound) } else { - val idx = Math.abs(noiseIdx.getAndIncrement() % travelTimeNoises.length) - physSimTravelTime * travelTimeNoises(idx) + physSimTravelTime } - val linkTravelTime = Math.max(physSimTravelTimeWithNoise, minTravelTime) - val result = Math.min(linkTravelTime, maxTravelTime) - result + // Use Math.min/max for cleaner clamping + Math.min(Math.max(physSimTravelTimeWithNoise, minTravelTime), maxTravelTime) } } diff --git a/src/main/scala/beam/router/r5/R5Wrapper.scala b/src/main/scala/beam/router/r5/R5Wrapper.scala index cc27bfac222..5e7549647ad 100644 --- a/src/main/scala/beam/router/r5/R5Wrapper.scala +++ b/src/main/scala/beam/router/r5/R5Wrapper.scala @@ -34,6 +34,7 @@ import org.matsim.vehicles.Vehicle import java.time.ZonedDateTime import java.time.temporal.ChronoUnit import java.util +import java.util.concurrent.ConcurrentHashMap import java.util.function.IntFunction import java.util.{Collections, Optional} import scala.collection.JavaConverters._ @@ -83,6 +84,19 @@ class R5Wrapper(workerParams: R5Parameters, travelTime: TravelTime, travelTimeNo ) }.toMap + private lazy val precomputedRestrictions: Map[RoutingVehicleCategory, Map[Long, Boolean]] = { + val categories = RoutingVehicleCategory.values + categories.map { category => + val categoryRestrictions = osmIdToRoadRestriction.map { case (osmId, restrictions) => + osmId -> restrictions.isRestricted( + category, + Double.MaxValue + ) + } + category -> categoryRestrictions + }.toMap + } + private val linkRadiusMeters: Double = beamConfig.beam.routing.r5.linkRadiusMeters @@ -493,6 +507,7 @@ class R5Wrapper(workerParams: R5Parameters, travelTime: TravelTime, travelTimeNo profileRequest.fromLat = from.getY profileRequest.toLon = to.getX profileRequest.toLat = to.getY + val walkToVehicleDuration = maybeWalkToVehicle(vehicle).map(leg => leg.beamLeg.duration).getOrElse(0) profileRequest.fromTime = request.departureTime + walkToVehicleDuration profileRequest.toTime = @@ -620,10 +635,16 @@ class R5Wrapper(workerParams: R5Parameters, travelTime: TravelTime, travelTimeNo val egressRouters = mutable.Map[LegMode, StreetRouter]() val egressStopsByMode = mutable.Map[LegMode, StopVisitor]() profileRequest.reverseSearch = true + val isCarEgress = egressVehicles.exists(_.mode == CAR) for (vehicle <- egressVehicles) { val (costPerMile, costPerMinute) = getVehicleCosts(vehicle) val theDestination = if (mainRouteToVehicle) { - destinationVehicle.get.locationUTM.loc + if (destinationVehicle.isDefined) { + destinationVehicle.get.locationUTM.loc + } else { + logger.error("Route requested with egress vehicles that don't exist") + request.destinationUTM + } } else { request.destinationUTM } @@ -678,10 +699,20 @@ class R5Wrapper(workerParams: R5Parameters, travelTime: TravelTime, travelTimeNo egressStopsByMode.put(legMode, stopVisitor) } } + if (isCarEgress) { + profileRequest.maxRides = 2 + profileRequest.suboptimalMinutes = beamConfig.beam.routing.r5.suboptimalMinutesForDriveAccess + } else { + profileRequest.maxRides = 3 + profileRequest.suboptimalMinutes = beamConfig.beam.routing.r5.suboptimalMinutes + } val departureTimeToDominatingList: IntFunction[DominatingList] = (departureTime: Int) => beamConfig.beam.routing.r5.transitAlternativeList.toLowerCase match { - case "suboptimal" if !mainRouteRideHailTransit => + case "suboptimal" if !mainRouteRideHailTransit && !isCarEgress => + // Note: We now disallow multiple responses for + // drive_transit. We should turn this back on if it is + // very important to the analysis new SuboptimalDominatingList( profileRequest.suboptimalMinutes ) @@ -693,9 +724,6 @@ class R5Wrapper(workerParams: R5Parameters, travelTime: TravelTime, travelTimeNo ) } - val transitPaths = latency("getpath-transit-time", Metrics.VerboseLevel) { - profileRequest.fromTime = request.departureTime - accessStopsByMode.flatMap { case (mode, stopVisitor) => val modeSpecificBuffer = mode match { case LegMode.WALK => beamConfig.beam.routing.r5.accessBufferTimeSeconds.walk case LegMode.BICYCLE => beamConfig.beam.routing.r5.accessBufferTimeSeconds.bike @@ -704,6 +732,7 @@ class R5Wrapper(workerParams: R5Parameters, travelTime: TravelTime, travelTimeNo case LegMode.CAR => beamConfig.beam.routing.r5.accessBufferTimeSeconds.car case _ => 0 } + profileRequest.fromTime = request.departureTime profileRequest.toTime = request.departureTime + modeSpecificBuffer + 61 // Important to allow 61 seconds for transit schedules to be considered! Along with any other buffers val router = new McRaptorSuboptimalPathProfileRouter( @@ -1270,7 +1299,7 @@ class R5Wrapper(workerParams: R5Parameters, travelTime: TravelTime, travelTimeNo streetMode: StreetMode, req: ProfileRequest ): Float = { - ttc(startTime + durationSeconds, edge.getEdgeIndex, streetMode).floatValue().ceil + math.ceil(ttc(startTime + durationSeconds, edge.getEdgeIndex, streetMode).toFloat).toFloat } } } @@ -1281,15 +1310,28 @@ class R5Wrapper(workerParams: R5Parameters, travelTime: TravelTime, travelTimeNo shouldApplyBicycleScaleFactor: Boolean = false ): TravelTimeByLinkCalculator = { val profileRequest = createProfileRequest + + // Cache the maximum velocity for this vehicle type + val vehicleMaxSpeed = vehicleType.maxVelocity.getOrElse(Double.MaxValue) + (time: Double, linkId: Int, streetMode: StreetMode) => { - val edge = transportNetwork.streetLayer.edgeStore.getCursor(linkId) - val maxSpeed: Double = vehicleType.maxVelocity.getOrElse(profileRequest.getSpeedForMode(streetMode)) - val minTravelTime = edge.getLengthM / maxSpeed - if (streetMode == StreetMode.CAR) { - carWeightCalculator.calcTravelTime(linkId, travelTime, Some(vehicleType), time, shouldAddNoise) - } else if (streetMode == StreetMode.BICYCLE && shouldApplyBicycleScaleFactor) { - val scaleFactor = bikeLanesAdjustment.scaleFactor(vehicleType, linkId) - minTravelTime * scaleFactor + // Get the edge length, using the cache + val edgeLength = transportNetwork.streetLayer.edgeStore.lengths_mm.get(linkId / 2) / 1000.0 + + // Calculate the mode-specific speed + val maxSpeed: Double = if (streetMode == StreetMode.CAR) { + Math.min(vehicleMaxSpeed, profileRequest.getSpeedForMode(streetMode)) + } else { + profileRequest.getSpeedForMode(streetMode) + } + + val minTravelTime = edgeLength / maxSpeed + + if (streetMode == StreetMode.BICYCLE && shouldApplyBicycleScaleFactor) { + //note we're not explicitly checking that it is a Bike VehicleType + minTravelTime * bikeLanesAdjustment.bikeScaleFactor(linkId) + } else if (streetMode == StreetMode.CAR) { + carWeightCalculator.calcTravelTime(linkId, travelTime, maxSpeed, time, shouldAddNoise, edgeLength) } else { minTravelTime } @@ -1309,19 +1351,20 @@ class R5Wrapper(workerParams: R5Parameters, travelTime: TravelTime, travelTimeNo perMinuteCost: Double = 0.0 ): TravelCostCalculator = { (edge: EdgeStore#Edge, legDurationSeconds: Int, traversalTimeSeconds: Float) => { + val osmId = edge.getOSMID + val category = RoutingVehicleCategory.fromCategory(vehicleType.vehicleCategory) val roadRestrictionWeightMultiplier: Float = - if ( - osmIdToRoadRestriction - .get(edge.getOSMID) - .exists( - _.isRestricted( - vehicleType.vehicleCategory, - vehicleType.restrictRoadsByFreeSpeedInMeterPerSecond.getOrElse(Double.MaxValue) - ) - ) - ) + if (precomputedRestrictions.getOrElse(category, Map.empty).getOrElse(osmId, false)) { workerParams.beamConfig.beam.agentsim.agents.vehicles.roadRestrictionWeightMultiplier.toFloat - else 1f + } else { + vehicleType.restrictRoadsByFreeSpeedInMeterPerSecond.map(maxSpeed => + osmIdToRoadRestriction.get(osmId).exists(_.isRestricted(vehicleType.vehicleCategory, maxSpeed)) + ) match { + case Some(true) => + workerParams.beamConfig.beam.agentsim.agents.vehicles.roadRestrictionWeightMultiplier.toFloat + case _ => 1f + } + } val fare: Double = traversalTimeSeconds / 60.0 * perMinuteCost + edge.getLengthM / METERS_IN_MILE * perMileCost @@ -1345,16 +1388,34 @@ object R5Wrapper { private val HeavyHeavyDutyTruckTag = "hgv" private val LightAndMediumHeavyDutyTruckTag = "mdv" + sealed trait RoutingVehicleCategory + + private object RoutingVehicleCategory { + case object HeavyDuty extends RoutingVehicleCategory + case object MediumDuty extends RoutingVehicleCategory + case object Other extends RoutingVehicleCategory + + val values: Set[RoutingVehicleCategory] = Set(HeavyDuty, MediumDuty, Other) + + def fromCategory(category: VehicleCategory.VehicleCategory): RoutingVehicleCategory = category match { + case VehicleCategory.Class78Tractor | VehicleCategory.Class78Vocational => HeavyDuty + case VehicleCategory.Class456Vocational | VehicleCategory.Class2b3Vocational => MediumDuty + case _ => Other + } + } + private case class RoadRestrictions(hhdt: Boolean, lmhdt: Boolean, freeSpeed: Double) { - def isRestricted(category: VehicleCategory.VehicleCategory, speedThreshold: Double): Boolean = { + def isRestricted(category: RoutingVehicleCategory, speedThreshold: Double): Boolean = { category match { - case VehicleCategory.Class78Tractor => !hhdt - case VehicleCategory.Class78Vocational => !hhdt - case VehicleCategory.Class456Vocational => !lmhdt - case VehicleCategory.Class2b3Vocational => !lmhdt - case _ => freeSpeed > speedThreshold + case RoutingVehicleCategory.HeavyDuty => !hhdt + case RoutingVehicleCategory.MediumDuty => !lmhdt + case RoutingVehicleCategory.Other => freeSpeed > speedThreshold } } + + def isRestricted(category: VehicleCategory.VehicleCategory, speedThreshold: Double): Boolean = { + isRestricted(RoutingVehicleCategory.fromCategory(category), speedThreshold) + } } } diff --git a/src/main/scala/beam/router/skim/ActivitySimPathType.scala b/src/main/scala/beam/router/skim/ActivitySimPathType.scala index 1d17ca5ed3b..43fcee74eae 100644 --- a/src/main/scala/beam/router/skim/ActivitySimPathType.scala +++ b/src/main/scala/beam/router/skim/ActivitySimPathType.scala @@ -2,9 +2,7 @@ package beam.router.skim import beam.agentsim.agents.ridehail.RideHailVehicleId import beam.router.Modes.BeamMode -import beam.router.Modes.BeamMode._ import beam.router.model.{EmbodiedBeamLeg, EmbodiedBeamTrip} -import beam.router.skim.ActivitySimMetric._ import org.matsim.api.core.v01.population.Activity sealed trait ActivitySimPathType @@ -48,7 +46,9 @@ object ActivitySimPathType { val (_, longestCarLegId) = tryGetLongestLegId(trip, isCar) val (longestWalkTransitLeg, longestWalkTransitLegId) = tryGetLongestLegId(trip, isTransit) - if (longestCarLegId.isEmpty || longestWalkTransitLeg.isEmpty || longestWalkTransitLegId.isEmpty) { + if (trip.legs.exists(_.isRideHail)) { + OTHER // Stub for when we merge in RH_TRANSIT mode from the Cruise branch + } else if (longestCarLegId.isEmpty || longestWalkTransitLeg.isEmpty || longestWalkTransitLegId.isEmpty) { OTHER } else if (longestCarLegId.get > longestWalkTransitLegId.get) { longestWalkTransitLeg.map(leg => leg.beamLeg.mode) match { diff --git a/src/main/scala/beam/router/skim/ActivitySimSkimmer.scala b/src/main/scala/beam/router/skim/ActivitySimSkimmer.scala index 7135752d417..a291faf0bd3 100644 --- a/src/main/scala/beam/router/skim/ActivitySimSkimmer.scala +++ b/src/main/scala/beam/router/skim/ActivitySimSkimmer.scala @@ -2,7 +2,7 @@ package beam.router.skim import beam.router.skim.ActivitySimPathType.{isWalkTransit, TNC_SHARED, TNC_SINGLE, WLK_TRN_WLK} import beam.router.skim.core.{AbstractSkimmer, AbstractSkimmerInternal, AbstractSkimmerKey, AbstractSkimmerReadOnly} -import beam.router.skim.urbansim.ActivitySimOmxWriter +import beam.router.skim.urbansim.{ActivitySimOmxWriter, ActivitySimZarrWriter} import beam.router.Modes.BeamMode import beam.router.Modes.BeamMode.{RIDE_HAIL, RIDE_HAIL_POOLED} import beam.sim.BeamScenario @@ -45,7 +45,11 @@ class ActivitySimSkimmer @Inject() (matsimServices: MatsimServices, beamScenario override def writeToDisk(event: IterationEndsEvent): Unit = if (config.writeSkimsInterval > 0 && event.getIteration % config.writeSkimsInterval == 0) { - val extension = if (config.activity_sim_skimmer.fileOutputFormat.equalsIgnoreCase("csv")) "csv.gz" else "omx" + val extension = config.activity_sim_skimmer.fileOutputFormat.toLowerCase match { + case "csv" => "csv.gz" + case "zarr" => "zarr" + case _ => "omx" + } val filePath = event.getServices.getControlerIO .getIterationFilename(event.getServices.getIterationNumber, s"${skimFileBaseName}_current.$extension") writePresentedSkims(filePath) @@ -284,8 +288,12 @@ class ActivitySimSkimmer @Inject() (matsimServices: MatsimServices, beamScenario case "csv" => val csvWriter = new CsvWriter(filePath, ExcerptData.csvHeaderSeq) csvWriter.writeAllAndClose(data.map(_.toCsvSeq)) - case _ => + case "zarr" => + ActivitySimZarrWriter.writeToZarr(filePath, data.iterator, geoUnits) + case "omx" => ActivitySimOmxWriter.writeToOmx(filePath, data.iterator, geoUnits) + case _ => + logger.warn("Not writing skims") } } catch { case exception: Exception => diff --git a/src/main/scala/beam/router/skim/urbansim/ActivitySimOmxWriter.scala b/src/main/scala/beam/router/skim/urbansim/ActivitySimOmxWriter.scala index 639cce67874..b8859ae0007 100644 --- a/src/main/scala/beam/router/skim/urbansim/ActivitySimOmxWriter.scala +++ b/src/main/scala/beam/router/skim/urbansim/ActivitySimOmxWriter.scala @@ -5,25 +5,26 @@ import beam.router.skim.ActivitySimPathType._ import beam.router.skim.ActivitySimSkimmer.ExcerptData import beam.router.skim.ActivitySimTimeBin._ import beam.router.skim.{ActivitySimMetric, ActivitySimPathType, ActivitySimTimeBin} -import beam.utils.FileUtils import beam.utils.csv.CsvWriter +import com.typesafe.scalalogging.LazyLogging import omx.hdf5.HDF5Loader import omx.{OmxFile, OmxMatrix} -import scala.collection.mutable -import scala.util.Try - /** * @author Dmitry Openkov */ -object ActivitySimOmxWriter { +object ActivitySimOmxWriter extends LazyLogging { def writeToOmx( filePath: String, skimData: Iterator[ExcerptData], geoUnits: Seq[String] - ): Try[Unit] = Try { + ): Unit = try { + logger.info(s"Starting writeToOmx with filePath: $filePath") + logger.info(s"HDF5 library preparation starting...") HDF5Loader.prepareHdf5Library() + logger.info(s"HDF5 library prepared successfully") + val pathTypeToMatrixData: Map[ActivitySimPathType, MatrixData] = ( for { data <- activitySimMatrixData @@ -31,38 +32,93 @@ object ActivitySimOmxWriter { limitedData = data.copy(metrics = data.metrics & ExcerptData.supportedActivitySimMetric) } yield pathType -> limitedData ).toMap - FileUtils.using( - new OmxFile(filePath) - ) { omxFile => - val shape: Array[Int] = Array.fill(geoUnits.size)(geoUnits.size) + logger.info(s"Matrix data map created with ${pathTypeToMatrixData.size} entries") + + logger.info(s"Creating new OmxFile instance for path: $filePath") + val omxFile = new OmxFile(filePath) + logger.info("OmxFile instance created successfully") + + logger.info(s"Shape size will be: ${geoUnits.size}x${geoUnits.size}") + + val shape: Array[Int] = Array.fill(geoUnits.size)(geoUnits.size) + logger.info("Attempting to open new file...") + try { omxFile.openNew(shape) - val geoUnitMapping = geoUnits.zipWithIndex.toMap + logger.info("File opened successfully") + } catch { + case e: Exception => + logger.error(s"Failed to open file: ${e.getMessage}") + logger.error(s"Exception class: ${e.getClass.getName}") + e.printStackTrace() + } + + val geoUnitMapping = geoUnits.zipWithIndex.toMap + + // Group the data by matrix key to process each matrix once + val groupedData = skimData.toSeq.groupBy { excerptData => + val pathType = excerptData.pathType match { + case rideHailMode @ (TNC_SINGLE | TNC_SHARED) => + f"${rideHailMode.toString}_${excerptData.fleetName.toUpperCase}" + case _ => excerptData.pathType.toString + } + ( + pathType, + excerptData.timePeriodString, + pathTypeToMatrixData.get(excerptData.pathType).map(_.metrics).getOrElse(Set.empty[ActivitySimMetric]) + ) + } - val allMatrices = mutable.Map.empty[String, OmxMatrix.OmxFloatMatrix] + // Process each matrix + for { + ((pathType, timePeriod, metrics), excerpts) <- groupedData + metric <- metrics + } { + val matrixName = s"${pathType}_${metric}__$timePeriod" + val valuesFloat = Array.fill[Float](shape(0), shape(1))(Float.NaN) + val matrix = new OmxMatrix.OmxFloatMatrix(matrixName, valuesFloat, -1.0f) + + matrix.setAttribute("mode", pathType) + matrix.setAttribute("timePeriod", timePeriod) + matrix.setAttribute("measure", metric.toString) + + // Fill the matrix for { - excerptData <- skimData - matrixData <- pathTypeToMatrixData.get(excerptData.pathType).toIterable - row <- geoUnitMapping.get(excerptData.originId).toIterable - column <- geoUnitMapping.get(excerptData.destinationId).toIterable - metric <- matrixData.metrics + excerptData <- excerpts + row <- geoUnitMapping.get(excerptData.originId) + column <- geoUnitMapping.get(excerptData.destinationId) } { - val pathType = excerptData.pathType match { - case rideHailMode @ (TNC_SINGLE | TNC_SHARED) => - f"${rideHailMode.toString}_${excerptData.fleetName.toUpperCase}" - case _ => excerptData.pathType.toString - } - val matrix = getOrCreateMatrix(allMatrices, pathType, excerptData.timePeriodString, metric, shape) - matrix.setAttribute("mode", pathType) - matrix.setAttribute("timePeriod", excerptData.timePeriodString) - matrix.setAttribute("measure", metric.toString) matrix.getData()(row)(column) = excerptData.getValue(metric).toFloat * getUnitConversion(metric) } - allMatrices.values.foreach(omxFile.addMatrix) - // we cannot add a lookup because string arrays are not supported by hdf5lib java - // omxFile.addLookup(new OmxStringLookup("zone_id", geoUnits.toArray, "")) + + omxFile.addMatrix(matrix) } - // we write geo unit mapping as a csv file next to the omx file + + logger.info("Saving OMX file...") + omxFile.save() + logger.info("OMX file saved successfully") + omxFile.close() + logger.info("OMX file closed") + + // Write geo unit mapping as before CsvWriter(filePath + ".mapping", "zone_id").writeAllAndClose(geoUnits.map(Seq(_))) + } catch { + case e: java.io.FileNotFoundException => + e.printStackTrace() + throw new RuntimeException(s"Failed to create or access file at path: $filePath. Error: ${e.getMessage}", e) + case e: java.io.IOException => + e.printStackTrace() + throw new RuntimeException(s"IO error while writing to OMX file: ${e.getMessage}", e) + case e: IllegalArgumentException => + throw new RuntimeException(s"Invalid argument provided: ${e.getMessage}", e) + case e: NoSuchElementException => + throw new RuntimeException(s"Missing required data: ${e.getMessage}", e) + case e: OutOfMemoryError => + throw new RuntimeException(s"Insufficient memory to process the matrix data.", e) + case e: Exception => + throw new RuntimeException( + s"Unexpected error while writing OMX file: ${e.getMessage}. Error type: ${e.getClass.getSimpleName}", + e + ) } private def getUnitConversion(metric: ActivitySimMetric): Float = { @@ -72,22 +128,6 @@ object ActivitySimOmxWriter { } } - private def getOrCreateMatrix( - matrixMap: mutable.Map[String, OmxMatrix.OmxFloatMatrix], - pathType: String, - timeBin: String, - metric: ActivitySimMetric, - shape: Array[Int] - ): OmxMatrix.OmxFloatMatrix = { - val matrixName = s"${pathType}_${metric}__$timeBin" - matrixMap.getOrElseUpdate( - matrixName, { - val valuesFloat = Array.fill[Float](shape(0), shape(1))(Float.NaN) - new OmxMatrix.OmxFloatMatrix(matrixName, valuesFloat, -1.0f) - } - ) - } - /** * Contains data types that is used by ActivitySim: path types, time bins and metrics * @param pathTypes possible path types diff --git a/src/main/scala/beam/router/skim/urbansim/ActivitySimZarrWriter.scala b/src/main/scala/beam/router/skim/urbansim/ActivitySimZarrWriter.scala new file mode 100644 index 00000000000..2ba568fad49 --- /dev/null +++ b/src/main/scala/beam/router/skim/urbansim/ActivitySimZarrWriter.scala @@ -0,0 +1,211 @@ +package beam.router.skim.urbansim + +import beam.router.skim.ActivitySimMetric._ +import beam.router.skim.ActivitySimPathType._ +import beam.router.skim.ActivitySimSkimmer.ExcerptData +import beam.router.skim.{ActivitySimMetric, ActivitySimPathType, ActivitySimTimeBin} +import beam.router.skim.ActivitySimTimeBin._ +import com.bc.zarr.DataType +import com.typesafe.scalalogging.LazyLogging +import com.bc.zarr.storage.FileSystemStore + +import java.nio.file.Paths + +object ActivitySimZarrWriter extends LazyLogging { + + def writeToZarr( + filePath: String, + skimData: Iterator[ExcerptData], + geoUnits: Seq[String] + ): Unit = try { + logger.info(s"Starting writeToZarr with filePath: $filePath") + + // Build a map from path type to MatrixData for quick lookup + val pathTypeToMatrixData: Map[ActivitySimPathType, MatrixData] = + activitySimMatrixData.flatMap(md => md.pathTypes.map(_ -> md)).toMap + + val geoUnitMapping = geoUnits.zipWithIndex.toMap + val timePeriods = ActivitySimTimeBin.values.toIndexedSeq // Keep as enum values for index lookup + val timePeriodNames = timePeriods.map(_.entryName) + + val groupedData = skimData.toSeq.groupBy { excerptData => + val pathType = excerptData.pathType match { // Standardize TNC names with fleet suffix + case rideHailMode @ (TNC_SINGLE | TNC_SHARED) => + f"${rideHailMode.toString}_${excerptData.fleetName.toUpperCase}" + case _ => excerptData.pathType.toString + } + ( + pathType, + pathTypeToMatrixData.get(excerptData.pathType).map(_.metrics).getOrElse(Set.empty[ActivitySimMetric]) + ) + } + + // --- Zarr Directory Store Implementation using com.bc.zarr --- + + val store = new FileSystemStore(Paths.get(filePath)) + logger.info(s"Zarr Directory Store created/opened at: $filePath") + + var rootGroup: com.bc.zarr.ZarrGroup = null + try { + rootGroup = com.bc.zarr.ZarrGroup.create(store) + logger.info("Root Zarr group created successfully") + + var dataset_count = 0 + + groupedData.par.foreach { case ((pathType, metrics), excerpts) => + metrics.par.foreach { metric => + val matrixName = s"${pathType}_${metric}" + val shape = Array[Int](geoUnits.size, geoUnits.size, timePeriods.size) + logger.debug(s"Creating dataset '$matrixName' with shape ${shape.mkString("x")}") + dataset_count += 1 + + val compressor = com.bc.zarr.CompressorFactory.create( + "zlib" + ) +// val compressor = com.bc.zarr.CompressorFactory.create( +// "blosc", +// "cname", +// "zstd", +// "clevel", +// "5", +// "shuffle", +// "1" +// ) + val chunkShape = Array[Int](shape(0), shape(1), 1) + + val arrayParams = new com.bc.zarr.ArrayParams() + .shape(shape: _*) + .chunks(chunkShape: _*) + .dataType(DataType.f4) + .compressor(compressor) + .fillValue(Float.NaN) + + val zarrArray = rootGroup.createArray(matrixName, arrayParams) + + excerpts.foreach { excerptData => + for { + row <- geoUnitMapping.get(excerptData.originId) + column <- geoUnitMapping.get(excerptData.destinationId) + timeBinOpt = ActivitySimTimeBin.values.find(_.entryName == excerptData.timePeriodString) + if timeBinOpt.isDefined + timeIdx = timePeriods.indexOf(timeBinOpt.get) + if timeIdx >= 0 + } { + val offset = Array[Int](row, column, timeIdx) + val dataShape = Array[Int](1, 1, 1) // Single value shape + val value = excerptData.getValue(metric).toFloat * getUnitConversion(metric) + val javaFloatArray = Array[Float](value) // Create primitive float array directly + try { + zarrArray.write(javaFloatArray, dataShape, offset) + } catch { + case e: java.lang.RuntimeException => + val value = excerptData.getValue(metric) + val conversion = getUnitConversion(metric) + logger.info(s"Writing value: $value (${value.getClass.getName}) with conversion: $conversion") + logger.error(s"Failed to initialize data for $matrixName at offset $offset: ${e.getMessage}", e) + } + } + } + + val attrs = zarrArray.getAttributes() + attrs.put("mode", pathType) + attrs.put("measure", metric.toString) + attrs.put("timePeriods", timePeriodNames.toList) + // No attrs.write() needed + + logger.debug(s"Successfully wrote dataset and attributes for '$matrixName'") + } + } // ADD SECOND BLOCK HERE + + logger.info( + s"Zarr Directory Store written successfully with $dataset_count datasets." + ) // Report actual dataset count + + } finally { + // No close method needed for rootGroup + } + + } catch { + case e: Exception => + logger.error(s"Unexpected error while writing Zarr file: ${e.getMessage}", e) + throw new RuntimeException( + s"Unexpected error while writing Zarr file: ${e.getMessage}. Error type: ${e.getClass.getSimpleName}", + e + ) + } + + private def getUnitConversion(metric: ActivitySimMetric): Float = { + metric match { + case DIST | DDIST => 1f / 1609.34f + case _ => 1f + } + } + + // --- Definitions copied from ActivitySimOmxWriter --- + // Contains data types that is used by ActivitySim: path types, time bins and metrics + // @param pathTypes possible path types + // @param timeBins we don't use time bins now because data can be defined for all time bins for all path types + // that Beam produces + // @param metrics possible metrics + case class MatrixData( + pathTypes: Set[ActivitySimPathType], + timeBins: Set[ActivitySimTimeBin], + metrics: Set[ActivitySimMetric] + ) + + // Configuration for which metrics are expected for which path types for ActivitySim export + private val activitySimMatrixData = IndexedSeq( + MatrixData( + Set(DRV_COM_WLK, DRV_EXP_WLK, DRV_HVY_WLK, WLK_COM_DRV, WLK_EXP_DRV, WLK_HVY_DRV), + ActivitySimTimeBin.values.toSet, + Set(TOTIVT, FAR, XWAIT, KEYIVT, IWAIT, DTIM, BOARDS, DDIST, WAUX, TRIPS, FAILURES) + ), + MatrixData( + Set(DRV_LOC_WLK, WLK_LOC_DRV), + ActivitySimTimeBin.values.toSet, + Set(TOTIVT, FAR, XWAIT, IWAIT, DTIM, BOARDS, DDIST, WAUX, TRIPS, FAILURES) + ), + MatrixData( + Set(DRV_LRF_WLK, WLK_LRF_DRV), + ActivitySimTimeBin.values.toSet, + Set(TOTIVT, FERRYIVT, FAR, XWAIT, KEYIVT, DTIM, IWAIT, BOARDS, DDIST, WAUX, TRIPS, FAILURES) + ), + MatrixData( + Set(HOV2TOLL, HOV3TOLL, SOVTOLL), + ActivitySimTimeBin.values.toSet, + Set(BTOLL, VTOLL, TIME, DIST) + ), + MatrixData( + Set(BIKE), + ActivitySimTimeBin.values.toSet, + Set(TIME, DIST) + ), + MatrixData(Set(HOV2, HOV3, SOV), ActivitySimTimeBin.values.toSet, Set(BTOLL, TIME, DIST, TRIPS, FAILURES)), + MatrixData( + Set(WLK_COM_WLK, WLK_EXP_WLK, WLK_HVY_WLK), + ActivitySimTimeBin.values.toSet, + Set(TOTIVT, FAR, XWAIT, KEYIVT, IWAIT, BOARDS, WAUX, TRIPS, FAILURES) + ), + MatrixData( + Set(WLK_LOC_WLK), + ActivitySimTimeBin.values.toSet, + Set(TOTIVT, FAR, XWAIT, IWAIT, BOARDS, WAUX, TRIPS, FAILURES) + ), + MatrixData( + Set(WLK_LRF_WLK), + ActivitySimTimeBin.values.toSet, + Set(TOTIVT, FERRYIVT, FAR, XWAIT, KEYIVT, IWAIT, BOARDS, WAUX, TRIPS, FAILURES) + ), + MatrixData( + Set(WLK_TRN_WLK), + Set(PM_PEAK, MIDDAY, AM_PEAK), + Set(WACC, IVT, XWAIT, IWAIT, WEGR, WAUX, TRIPS, FAILURES) + ), + MatrixData( + Set(TNC_SINGLE, TNC_SHARED), + ActivitySimTimeBin.values.toSet, + Set(IWAIT, TOTIVT, DDIST, FAR, TRIPS, FAILURES) + ) + ) + // --- End of definitions copied from ActivitySimOmxWriter --- +} diff --git a/src/main/scala/beam/scoring/BeamScoringFunctionFactory.scala b/src/main/scala/beam/scoring/BeamScoringFunctionFactory.scala index d8d8f12f9cd..8e5013eae1d 100755 --- a/src/main/scala/beam/scoring/BeamScoringFunctionFactory.scala +++ b/src/main/scala/beam/scoring/BeamScoringFunctionFactory.scala @@ -11,6 +11,7 @@ import beam.sim.population.AttributesOfIndividual import beam.sim.population.PopulationAdjustment._ import beam.utils.{FileUtils, OutputDataDescriptor} import com.typesafe.scalalogging.LazyLogging +import org.apache.commons.lang3.math.NumberUtils import org.matsim.api.core.v01.events.{Event, PersonArrivalEvent} import org.matsim.api.core.v01.population.{Activity, Leg, Person} import org.matsim.core.controler.OutputDirectoryHierarchy @@ -149,7 +150,42 @@ class BeamScoringFunctionFactory @Inject() ( leg.getAttributes.putAttribute("vehicles", trip.vehiclesInTrip.mkString(",")) } - val allDayScore = modeChoiceCalculator.computeAllDayUtility(trips, person, attributes) + // TODO: Factor in ivt multipliers from ASim to downweight extra time on transit: +// ivt_cost_multiplier: 0.6 +// ivt_lrt_multiplier: 0.9 +// ivt_ferry_multiplier: 0.8 +// ivt_exp_multiplier: 1 +// ivt_hvy_multiplier: 0.8 +// ivt_com_multiplier: 0.7 + + val tripsWithUpdatedAttributes = trips + .zip(personLegs) + .map { case (x, y) => + x -> Map("travelTimeRatio" -> (Option(y.getAttributes.getAttribute("trip_dur_min")) match { + case Some(expectedTravelTime) => + x.totalTravelTimeInSecs.toDouble / 60.0 / NumberUtils.toDouble(expectedTravelTime.toString) + case None => + logger.debug(s"Missing expected travel time ratio for leg $y") + 1.0 + })) + } + .toMap + val allDayExpectedScore = if (beamConfig.beam.replanning.subtractExpectedScores) { + modeChoiceCalculator.computeAllDayUtility( + tripsWithUpdatedAttributes, + person, + attributes, + overrideAttributes = true + ) + } else { 0.0 } + val allDayScore = + modeChoiceCalculator.computeAllDayUtility( + tripsWithUpdatedAttributes, + person, + attributes, + overrideAttributes = false + ) + val personActivities = person.getSelectedPlan.getPlanElements.asScala .collect { case activity: Activity => activity @@ -159,12 +195,21 @@ class BeamScoringFunctionFactory @Inject() ( personActivities.foldLeft(0.0)(_ + getActivityBenefit(_, attributes)) } else { 0.0 } val replanningScore = -replanningEventCount.toFloat * beamConfig.beam.replanning.replanningPenaltyInDollars + val utilsConversion = beamConfig.beam.agentsim.agents.modalBehaviors.multinomialLogit.units.toLowerCase match { + case "utils" => + beamConfig.beam.agentsim.agents.modalBehaviors.multinomialLogit.params.time / attributes.valueOfTime * 60.0 // Convert hours to minutes + case "dollars" => 1.0 + } - finalScore = allDayScore + leavingParkingEventScore + activityScore + replanningScore - finalScore = Math.max( - finalScore, - -100000 - ) // keep scores no further below -100k to keep MATSim happy (doesn't like -Infinity) but knowing + finalScore = + (allDayScore + leavingParkingEventScore + activityScore + replanningScore - allDayExpectedScore) * utilsConversion + finalScore = if (finalScore.isNaN) { -1000 } + else { + Math.max( + finalScore, + -100000 + ) + } // keep scores no further below -100k to keep MATSim happy (doesn't like -Infinity) but knowing // that if changes to utility function drive the true scores below -100k, this will need to be replaced with another big number. // Write the individual's trip scores to csv diff --git a/src/main/scala/beam/sim/BeamHelper.scala b/src/main/scala/beam/sim/BeamHelper.scala index d86507daafe..4177de6fd93 100755 --- a/src/main/scala/beam/sim/BeamHelper.scala +++ b/src/main/scala/beam/sim/BeamHelper.scala @@ -398,6 +398,31 @@ trait BeamHelper extends LazyLogging with BeamValidationHelper { } } + def vehicleEnergy(beamConfig: BeamConfig, vehicleTypes: Map[Id[BeamVehicleType], BeamVehicleType]): VehicleEnergy = { + var vehiclePaths = IndexedSeq( + Paths.get(beamConfig.beam.agentsim.agents.vehicles.vehicleTypesFilePath).getParent.toString + ) + beamConfig.beam.agentsim.agents.freight.vehicleTypesFilePath + .map(freightVehicleTypesFilePath => Paths.get(freightVehicleTypesFilePath).getParent.toString) + .foreach(freightVehiclePath => vehiclePaths = vehiclePaths :+ freightVehiclePath) + + val vehicleCsvReader = new VehicleCsvReader(beamConfig) + val consumptionRateFilterStore = + new ConsumptionRateFilterStoreImpl( + vehicleCsvReader.getVehicleEnergyRecordsUsing, + vehiclePaths, + primaryConsumptionRateFilePathsByVehicleType = + vehicleTypes.values.map(x => (x, x.primaryVehicleEnergyFile)).toIndexedSeq, + secondaryConsumptionRateFilePathsByVehicleType = + vehicleTypes.values.map(x => (x, x.secondaryVehicleEnergyFile)).toIndexedSeq + ) + // TODO Fix me once `TrieMap` is removed + new VehicleEnergy( + consumptionRateFilterStore, + vehicleCsvReader.getLinkToGradeRecordsUsing + ) + } + private def readPrivateVehicles( beamConfig: BeamConfig, vehicleTypes: Map[Id[BeamVehicleType], BeamVehicleType] diff --git a/src/main/scala/beam/sim/BeamMobsim.scala b/src/main/scala/beam/sim/BeamMobsim.scala index 2fbf93e6383..e807a2fe6ff 100755 --- a/src/main/scala/beam/sim/BeamMobsim.scala +++ b/src/main/scala/beam/sim/BeamMobsim.scala @@ -455,7 +455,8 @@ class BeamMobsimIteration( if (beamServices.beamConfig.beam.agentsim.agents.rideHail.managers.size == 1) { val managerConfig = beamConfig.beam.agentsim.agents.rideHail.managers.head val rhmName = managerConfig.name - val rideHailManagerId = VehicleManager.createOrGetReservedFor(rhmName, VehicleManager.TypeEnum.RideHail).managerId + val rideHailManagerId = + VehicleManager.createOrGetReservedFor(rhmName, Some(VehicleManager.TypeEnum.RideHail)).managerId val rideHailFleetInitializer = rideHailFleetInitializerProvider.get(rhmName) Props( new RideHailManager( diff --git a/src/main/scala/beam/sim/BeamSim.scala b/src/main/scala/beam/sim/BeamSim.scala index cdf1549b7aa..02b16af6994 100755 --- a/src/main/scala/beam/sim/BeamSim.scala +++ b/src/main/scala/beam/sim/BeamSim.scala @@ -35,6 +35,7 @@ import com.conveyal.r5.transit.TransportNetwork import com.google.inject.Inject import com.typesafe.config.Config import com.typesafe.scalalogging.LazyLogging +import kamon.Kamon import org.apache.commons.lang3.StringUtils import org.jfree.data.category.DefaultCategoryDataset import org.matsim.api.core.v01.Scenario @@ -592,17 +593,31 @@ class BeamSim @Inject() ( "dumpMatsimStuffAtTheBeginningOfSimulation in the beginning of simulation", x => logger.info(x) ) { - // `DumpDataAtEnd` during `notifyShutdown` dumps network, plans, person attributes and other things. - // Reusing it to get `outputPersonAttributes.xml.gz` which is needed for warmstart - val dumper = beamServices.injector.getInstance(classOf[DumpDataAtEnd]) - dumper match { - case listener: ShutdownListener => - val event = new ShutdownEvent(beamServices.matsimServices, false) - // Create files - listener.notifyShutdown(event) - dumpHouseholdAttributes - - case _ => logger.warn(s"dumper is not `ShutdownListener` - $dumper") + // Get the specific logger and save its original level + val dumpLogger = org.apache.log4j.Logger.getLogger("org.matsim.core.controler.corelisteners.DumpDataAtEndImpl") + val originalLevel = dumpLogger.getLevel + + // Temporarily set log level to WARN to suppress ERROR messages + dumpLogger.setLevel(org.apache.log4j.Level.WARN) + + try { + val dumper = beamServices.injector.getInstance(classOf[DumpDataAtEnd]) + dumper match { + case listener: ShutdownListener => + val event = new ShutdownEvent(beamServices.matsimServices, false) + try { + // Create files + listener.notifyShutdown(event) + dumpHouseholdAttributes() + } catch { + case ex: Throwable => + logger.error(s"Exception during initial data dump: ${ex.getMessage}") + } + case _ => logger.warn(s"dumper is not `ShutdownListener` - $dumper") + } + } finally { + // Restore original logging configuration + dumpLogger.setLevel(originalLevel) } } } @@ -659,6 +674,7 @@ class BeamSim @Inject() ( logger.info("Actor system shut down") deleteMATSimOutputFiles(event.getServices.getIterationNumber) + Kamon.stopModules() // simulation python scripts for { diff --git a/src/main/scala/beam/sim/RideHailFleetInitializer.scala b/src/main/scala/beam/sim/RideHailFleetInitializer.scala index d081d27f51b..cf980452854 100644 --- a/src/main/scala/beam/sim/RideHailFleetInitializer.scala +++ b/src/main/scala/beam/sim/RideHailFleetInitializer.scala @@ -47,7 +47,7 @@ object RideHailFleetInitializer extends OutputDataDescriptor with LazyLogging { val id = GenericCsvReader.getIfNotNull(rec, "id") val rideHailManagerIdStr = GenericCsvReader.getIfNotNull(rec, "rideHailManagerId") val rideHailManagerId = - VehicleManager.createOrGetReservedFor(rideHailManagerIdStr, VehicleManager.TypeEnum.RideHail).managerId + VehicleManager.createOrGetReservedFor(rideHailManagerIdStr, Some(VehicleManager.TypeEnum.RideHail)).managerId val vehicleType = GenericCsvReader.getIfNotNull(rec, "vehicleType") val initialLocationX = GenericCsvReader.getIfNotNull(rec, "initialLocationX").toDouble val initialLocationY = GenericCsvReader.getIfNotNull(rec, "initialLocationY").toDouble @@ -629,15 +629,15 @@ class ProceduralRideHailFleetInitializer( val realDistribution: UniformRealDistributionEnhanced = new UniformRealDistributionEnhanced() realDistribution.reseedRandomGenerator(beamServices.beamConfig.matsim.modules.global.randomSeed) - val passengerPopulation: Iterable[Person] = scenario.getPopulation.getPersons + private val passengerPopulation: Iterable[Person] = scenario.getPopulation.getPersons .values() .asScala - .filterNot(_.getId.toString.startsWith(FreightReader.FREIGHT_ID_PREFIX)) + .filterNot(_.getId.toString.startsWith(FreightReader.CARRIER_ID_PREFIX)) - val passengerHousehold: Iterable[Household] = scenario.getHouseholds.getHouseholds + private val passengerHousehold: Iterable[Household] = scenario.getHouseholds.getHouseholds .values() .asScala - .filterNot(_.getId.toString.startsWith(FreightReader.FREIGHT_ID_PREFIX)) + .filterNot(_.getId.toString.startsWith(FreightReader.CARRIER_ID_PREFIX)) private def computeNumRideHailAgents: Long = { val fleet: Double = beamServices.beamConfig.beam.agentsim.agents.vehicles.fractionOfInitialVehicleFleet @@ -652,6 +652,12 @@ class ProceduralRideHailFleetInitializer( } .count(beamVehicleType => beamVehicleType.vehicleCategory == VehicleCategory.Car) / fleet + logger.info( + s"Manager: ${managerConfig.name}: Number of household vehicles: $initialNumHouseholdVehicles, " + + s"fraction of initial vehicle fleet: ${managerConfig.initialization.procedural.fractionOfInitialVehicleFleet}, " + + s"ride hail agents to be generated: ${math.round(initialNumHouseholdVehicles * managerConfig.initialization.procedural.fractionOfInitialVehicleFleet)}" + ) + math.round( initialNumHouseholdVehicles * managerConfig.initialization.procedural.fractionOfInitialVehicleFleet @@ -665,7 +671,14 @@ class ProceduralRideHailFleetInitializer( val averageOnDutyHoursPerDay = managerConfig.initialization.procedural.averageOnDutyHoursPerDay val meanLogShiftDurationHours = managerConfig.initialization.procedural.meanLogShiftDurationHours val stdLogShiftDurationHours = managerConfig.initialization.procedural.stdLogShiftDurationHours - var equivalentNumberOfDrivers = managerConfig.initialization.procedural.equivalentNumberOfDrivers + var equivalentNumberOfDrivers = if (managerConfig.initialization.procedural.equivalentNumberOfDrivers >= 0) { + managerConfig.initialization.procedural.equivalentNumberOfDrivers + } else { + logger.warn( + s"Equivalent number of drivers is set to ${managerConfig.initialization.procedural.equivalentNumberOfDrivers}, setting it to 0 instead" + ) + 0 + } val personsWithMoreThanOneActivity = passengerPopulation.filter(_.getSelectedPlan.getPlanElements.size > 1) val persons: Array[Person] = rand.shuffle(personsWithMoreThanOneActivity).toArray @@ -687,60 +700,80 @@ class ProceduralRideHailFleetInitializer( val rideHailAgentInitializers: ArrayBuffer[RideHailFleetInitializer.RideHailAgentInitializer] = new ArrayBuffer() var idx = 0 val numRideHailAgents = computeNumRideHailAgents + var warned = false while (equivalentNumberOfDrivers < numRideHailAgents.toDouble) { - if (idx >= persons.length) { - throw new IllegalStateException("Can't have more ridehail drivers than total population") - } else { - try { - val person = persons(idx) - val vehicleType = vehiclesAdjustment - .sampleVehicleTypes( - numVehicles = 1, - vehicleCategory = VehicleCategory.Car, - realDistribution + if ((idx >= persons.length) && !warned) { + logger.warn( + s"We need ${numRideHailAgents.toDouble} ridehail agents, which is more than total population of ${persons.length}" + ) + logger.info(s"Current ratio of drivers to agents is $idx agents, $equivalentNumberOfDrivers drivers") + warned = true + } + try { + val person = persons(idx % persons.length) + val vehicleType = vehiclesAdjustment + .sampleVehicleTypes( + numVehicles = 1, + vehicleCategory = VehicleCategory.Car, + realDistribution + ) + .head + val rideInitialLocation: Location = getRideInitLocation(person, activityQuadTreeBounds) + + val meanSoc = beamServices.beamConfig.beam.agentsim.agents.vehicles.meanRidehailVehicleStartingSOC + val initialStateOfCharge = + beam.utils.BeamVehicleUtils.randomSocFromUniformDistribution(rand, vehicleType, meanSoc) + + val (shiftsOpt, shiftEquivalentNumberOfDrivers) = if (vehicleType.isConnectedAutomatedVehicle) { + (None, 1.0) + } else { + val shiftDuration = + math.round(math.exp(rand.nextGaussian() * stdLogShiftDurationHours + meanLogShiftDurationHours) * 3600) + val shiftMidPointTime = activityEndTimes(rand.nextInt(activityEndTimes.length)) + val shiftStartTime = max(shiftMidPointTime - (shiftDuration / 2).toInt, 10) + val shiftEndTime = min(shiftMidPointTime + (shiftDuration / 2).toInt, 30 * 3600) + + val shiftEquivalentNumberOfDrivers_ = (shiftEndTime - shiftStartTime) / (averageOnDutyHoursPerDay * 3600) + if (shiftEquivalentNumberOfDrivers_ < 0.0) { + logger.warn( + s"How did we end up with a negative equivalent number of drivers? " + + s"shiftStartTime: $shiftStartTime, shiftEndTime: $shiftEndTime, shiftDuration: $shiftDuration" ) - .head - val rideInitialLocation: Location = getRideInitLocation(person, activityQuadTreeBounds) - - val meanSoc = beamServices.beamConfig.beam.agentsim.agents.vehicles.meanRidehailVehicleStartingSOC - val initialStateOfCharge = - beam.utils.BeamVehicleUtils.randomSocFromUniformDistribution(rand, vehicleType, meanSoc) - - val (shiftsOpt, shiftEquivalentNumberOfDrivers) = if (vehicleType.isConnectedAutomatedVehicle) { - (None, 1.0) + (Some(List(Shift(Range(shiftStartTime, shiftEndTime), None))), 1.0) } else { - val shiftDuration = - math.round(math.exp(rand.nextGaussian() * stdLogShiftDurationHours + meanLogShiftDurationHours) * 3600) - val shiftMidPointTime = activityEndTimes(rand.nextInt(activityEndTimes.length)) - val shiftStartTime = max(shiftMidPointTime - (shiftDuration / 2).toInt, 10) - val shiftEndTime = min(shiftMidPointTime + (shiftDuration / 2).toInt, 30 * 3600) - - val shiftEquivalentNumberOfDrivers_ = (shiftEndTime - shiftStartTime) / (averageOnDutyHoursPerDay * 3600) - (Some(List(Shift(Range(shiftStartTime, shiftEndTime), None))), shiftEquivalentNumberOfDrivers_) } - val rideHailAgentInitializer = RideHailAgentInitializer( - person.getId.toString, - vehicleType, - rideHailManagerId, - shiftsOpt, - initialStateOfCharge, - rideInitialLocation, - geofence = None, - fleetId = managerConfig.name - ) + } - rideHailAgentInitializers += rideHailAgentInitializer + val rideHailAgentInitializer = RideHailAgentInitializer( + person.getId.toString, + vehicleType, + rideHailManagerId, + shiftsOpt, + initialStateOfCharge, + rideInitialLocation, + geofence = None, + fleetId = managerConfig.name + ) - equivalentNumberOfDrivers += shiftEquivalentNumberOfDrivers - } catch { - case ex: Throwable => - logger.error(s"Could not generate RideHailAgentInitializer: ${ex.getMessage}") - throw ex - } - idx += 1 + rideHailAgentInitializers += rideHailAgentInitializer + + equivalentNumberOfDrivers += shiftEquivalentNumberOfDrivers + } catch { + case ex: Throwable => + logger.error(s"Could not generate RideHailAgentInitializer: ${ex.getMessage}") + throw ex } + idx += 1 + } + + if (warned) { + logger.warn( + s"Generated $equivalentNumberOfDrivers ride hail agents for $idx shifts " + + s"for $rideHailManagerId, which is more than the total " + + s"population of ${persons.length}." + ) } rideHailAgentInitializers.toIndexedSeq @@ -924,7 +957,7 @@ case class ShpGeofence( geometries.exists(_.contains(point)) } - override def toString() = { + override def toString(): String = { s"ShpGeofence(${geometries.size} features from file: $geofenceShpFile)" } diff --git a/src/main/scala/beam/sim/common/GeoUtils.scala b/src/main/scala/beam/sim/common/GeoUtils.scala index cd037f307c2..98383815410 100755 --- a/src/main/scala/beam/sim/common/GeoUtils.scala +++ b/src/main/scala/beam/sim/common/GeoUtils.scala @@ -29,6 +29,7 @@ trait GeoUtils extends ExponentialLazyLogging { def localCRS: String val defaultMaxRadiusForMapSearch = 20000 private lazy val notExponentialLogger = Logger(LoggerFactory.getLogger(getClass.getName)) + private var cachedEdges: Option[Array[(EdgeWithCoord, GpxPoint)]] = None lazy val utm2Wgs: GeotoolsTransformation = new GeotoolsTransformation(localCRS, "EPSG:4326") @@ -95,7 +96,9 @@ trait GeoUtils extends ExponentialLazyLogging { distUTMInMeters(matsimUtmCoord, wgs2Utm(coordWGS)) } val distUTM = distUTMInMeters(wgs2Utm(coordWGS), wgs2Utm(new v01.Coord(closest.wgsCoord.x, closest.wgsCoord.y))) - notExponentialLogger.warn(s"""Will return closest to the corner: $closest which is $distUTM meters far away""") + notExponentialLogger.warn( + s"""Will return closest to the corner: $closest which is $distUTM meters far away from request at $coordWGS""" + ) closest.edgeIndex } else { theSplit.edge @@ -141,7 +144,7 @@ trait GeoUtils extends ExponentialLazyLogging { theSplit = streetLayer.findSplit(coord.getY, coord.getX, maxRadius, streetMode) } if (theSplit == null) { - notExponentialLogger.warn( + notExponentialLogger.debug( s"The split is `null` for StreetLayer.BoundingBox: ${streetLayer.getEnvelope}, coord: $coord, maxRadius: $maxRadius, street mode $streetMode" ) } @@ -149,25 +152,27 @@ trait GeoUtils extends ExponentialLazyLogging { } def getEdgesCloseToBoundingBox(streetLayer: StreetLayer): Array[(EdgeWithCoord, GpxPoint)] = { - val cursor = streetLayer.edgeStore.getCursor() - val iter = new Iterator[EdgeStore#Edge] { - override def hasNext: Boolean = cursor.advance() + cachedEdges.getOrElse { - override def next(): EdgeStore#Edge = cursor - } + val cursor = streetLayer.edgeStore.getCursor() + val iter = new Iterator[EdgeStore#Edge] { + override def hasNext: Boolean = cursor.advance() + + override def next(): EdgeStore#Edge = cursor + } - val boundingBox = streetLayer.envelope + val boundingBox = streetLayer.envelope - val insideBoundingBox = iter - .flatMap { edge => - Option(edge.getGeometry.getBoundary.getCoordinate).map { coord => - EdgeWithCoord(edge.getEdgeIndex, coord) + val insideBoundingBox = iter + .flatMap { edge => + Option(edge.getGeometry.getBoundary.getCoordinate).map { coord => + EdgeWithCoord(edge.getEdgeIndex, coord) + } } - } - .withFilter(x => boundingBox.contains(x.wgsCoord)) - .toArray + .withFilter(x => boundingBox.contains(x.wgsCoord)) + .toArray - /* + /* min => x0,y0 max => x1,y1 x0,y1 (TOP LEFT) ._____._____. x1,y1 (TOP RIGHT) @@ -177,37 +182,39 @@ x0,y1 (TOP LEFT) ._____._____. x1,y1 (TOP RIGHT) | | | | x0,y0 (BOTTOM LEFT) ._____._____. x1, y0 (BOTTOM RIGHT) - */ - - val bottomLeft = new Coord(boundingBox.getMinX, boundingBox.getMinY) - val topLeft = new Coord(boundingBox.getMinX, boundingBox.getMaxY) - val topRight = new Coord(boundingBox.getMaxX, boundingBox.getMaxY) - val bottomRight = new Coord(boundingBox.getMaxX, boundingBox.getMinY) - val midLeft = new Coord((bottomLeft.getX + topLeft.getX) / 2, (bottomLeft.getY + topLeft.getY) / 2) - val midTop = new Coord((topLeft.getX + topRight.getX) / 2, (topLeft.getY + topRight.getY) / 2) - val midRight = new Coord((topRight.getX + bottomRight.getX) / 2, (topRight.getY + bottomRight.getY) / 2) - val midBottom = new Coord((bottomLeft.getX + bottomRight.getX) / 2, (bottomLeft.getY + bottomRight.getY) / 2) - - val corners = Array( - GpxPoint("BottomLeft", bottomLeft), - GpxPoint("TopLeft", topLeft), - GpxPoint("TopRight", topRight), - GpxPoint("BottomRight", bottomRight), - GpxPoint("MidLeft", midLeft), - GpxPoint("MidTop", midTop), - GpxPoint("MidRight", midRight), - GpxPoint("MidBottom", midBottom) - ) + */ + + val bottomLeft = new Coord(boundingBox.getMinX, boundingBox.getMinY) + val topLeft = new Coord(boundingBox.getMinX, boundingBox.getMaxY) + val topRight = new Coord(boundingBox.getMaxX, boundingBox.getMaxY) + val bottomRight = new Coord(boundingBox.getMaxX, boundingBox.getMinY) + val midLeft = new Coord((bottomLeft.getX + topLeft.getX) / 2, (bottomLeft.getY + topLeft.getY) / 2) + val midTop = new Coord((topLeft.getX + topRight.getX) / 2, (topLeft.getY + topRight.getY) / 2) + val midRight = new Coord((topRight.getX + bottomRight.getX) / 2, (topRight.getY + bottomRight.getY) / 2) + val midBottom = new Coord((bottomLeft.getX + bottomRight.getX) / 2, (bottomLeft.getY + bottomRight.getY) / 2) + + val corners = Array( + GpxPoint("BottomLeft", bottomLeft), + GpxPoint("TopLeft", topLeft), + GpxPoint("TopRight", topRight), + GpxPoint("BottomRight", bottomRight), + GpxPoint("MidLeft", midLeft), + GpxPoint("MidTop", midTop), + GpxPoint("MidRight", midRight), + GpxPoint("MidBottom", midBottom) + ) - val closestEdges = corners.map { gpxPoint => - val utmCornerCoord = wgs2Utm(gpxPoint.wgsCoord) - val closestEdge: EdgeWithCoord = insideBoundingBox.minBy { x => - val utmCoord = wgs2Utm(new Coord(x.wgsCoord.x, x.wgsCoord.y)) - distUTMInMeters(utmCornerCoord, utmCoord) + val closestEdges = corners.map { gpxPoint => + val utmCornerCoord = wgs2Utm(gpxPoint.wgsCoord) + val closestEdge: EdgeWithCoord = insideBoundingBox.minBy { x => + val utmCoord = wgs2Utm(new Coord(x.wgsCoord.x, x.wgsCoord.y)) + distUTMInMeters(utmCornerCoord, utmCoord) + } + (closestEdge, gpxPoint) } - (closestEdge, gpxPoint) + cachedEdges = Some(closestEdges) + closestEdges } - closestEdges } } diff --git a/src/main/scala/beam/sim/config/BeamConfig.scala b/src/main/scala/beam/sim/config/BeamConfig.scala index a1402848c27..34836b9ea18 100644 --- a/src/main/scala/beam/sim/config/BeamConfig.scala +++ b/src/main/scala/beam/sim/config/BeamConfig.scala @@ -45,6 +45,7 @@ object BeamConfig { fractionOfPlansWithSingleActivity: scala.Double, h3taz: BeamConfig.Beam.Agentsim.H3taz, lastIteration: scala.Int, + lastTransitTrip: java.lang.String, populationAdjustment: java.lang.String, randomSeedForPopulationSampling: scala.Option[scala.Int], scenarios: BeamConfig.Beam.Agentsim.Scenarios, @@ -618,6 +619,7 @@ object BeamConfig { case class MultinomialLogit( params: BeamConfig.Beam.Agentsim.Agents.ModalBehaviors.MultinomialLogit.Params, + units: java.lang.String, utility_scale_factor: scala.Double ) @@ -633,6 +635,7 @@ object BeamConfig { ride_hail_pooled_intercept: scala.Double, ride_hail_subscription: scala.Double, ride_hail_transit_intercept: scala.Double, + time: scala.Double, transfer: scala.Double, transit_crowding: scala.Double, transit_crowding_VOT_multiplier: scala.Double, @@ -665,6 +668,7 @@ object BeamConfig { ride_hail_transit_intercept = if (c.hasPathOrNull("ride_hail_transit_intercept")) c.getDouble("ride_hail_transit_intercept") else 0.0, + time = if (c.hasPathOrNull("time")) c.getDouble("time") else 0.022, transfer = if (c.hasPathOrNull("transfer")) c.getDouble("transfer") else -1.4, transit_crowding = if (c.hasPathOrNull("transit_crowding")) c.getDouble("transit_crowding") else 0.0, transit_crowding_VOT_multiplier = @@ -692,6 +696,7 @@ object BeamConfig { if (c.hasPathOrNull("params")) c.getConfig("params") else com.typesafe.config.ConfigFactory.parseString("params{}") ), + units = if (c.hasPathOrNull("units")) c.getString("units") else "dollars", utility_scale_factor = if (c.hasPathOrNull("utility_scale_factor")) c.getDouble("utility_scale_factor") else 1.0 ) @@ -2439,6 +2444,7 @@ object BeamConfig { else com.typesafe.config.ConfigFactory.parseString("h3taz{}") ), lastIteration = if (c.hasPathOrNull("lastIteration")) c.getInt("lastIteration") else 0, + lastTransitTrip = if (c.hasPathOrNull("lastTransitTrip")) c.getString("lastTransitTrip") else "28:00:00", populationAdjustment = if (c.hasPathOrNull("populationAdjustment")) c.getString("populationAdjustment") else "DEFAULT_ADJUSTMENT", randomSeedForPopulationSampling = @@ -2881,7 +2887,11 @@ object BeamConfig { case class Output( activity_sim_skimmer: scala.Option[BeamConfig.Beam.Exchange.Output.ActivitySimSkimmer], - emissions: BeamConfig.Beam.Exchange.Output.Emissions + activitySimSkimsEnabled: scala.Boolean, + emissions: BeamConfig.Beam.Exchange.Output.Emissions, + generateSkimsForAllModes: scala.Boolean, + generateSkimsForRideHailTransit: scala.Boolean, + sendNonChosenTripsToSkimmer: scala.Boolean ) object Output { @@ -2907,7 +2917,7 @@ object BeamConfig { } case class Secondary( - beamModeFilter: scala.List[java.lang.String], + beamModeFilter: scala.Option[scala.List[java.lang.String]], enabled: scala.Boolean, taz: BeamConfig.Beam.Exchange.Output.ActivitySimSkimmer.Secondary.Taz ) @@ -2945,8 +2955,8 @@ object BeamConfig { c: com.typesafe.config.Config ): BeamConfig.Beam.Exchange.Output.ActivitySimSkimmer.Secondary.Taz = { BeamConfig.Beam.Exchange.Output.ActivitySimSkimmer.Secondary.Taz( - filePath = c.getString("filePath"), - tazIdFieldName = c.getString("tazIdFieldName"), + filePath = if (c.hasPathOrNull("filePath")) c.getString("filePath") else "''", + tazIdFieldName = if (c.hasPathOrNull("tazIdFieldName")) c.getString("tazIdFieldName") else "''", tazMapping = if (c.hasPathOrNull("tazMapping")) scala.Some( @@ -2960,7 +2970,8 @@ object BeamConfig { def apply(c: com.typesafe.config.Config): BeamConfig.Beam.Exchange.Output.ActivitySimSkimmer.Secondary = { BeamConfig.Beam.Exchange.Output.ActivitySimSkimmer.Secondary( - beamModeFilter = $_L$_str(c.getList("beamModeFilter")), + beamModeFilter = + if (c.hasPathOrNull("beamModeFilter")) scala.Some($_L$_str(c.getList("beamModeFilter"))) else None, enabled = c.hasPathOrNull("enabled") && c.getBoolean("enabled"), taz = BeamConfig.Beam.Exchange.Output.ActivitySimSkimmer.Secondary.Taz( if (c.hasPathOrNull("taz")) c.getConfig("taz") @@ -3009,10 +3020,18 @@ object BeamConfig { if (c.hasPathOrNull("activity-sim-skimmer")) scala.Some(BeamConfig.Beam.Exchange.Output.ActivitySimSkimmer(c.getConfig("activity-sim-skimmer"))) else None, + activitySimSkimsEnabled = + c.hasPathOrNull("activitySimSkimsEnabled") && c.getBoolean("activitySimSkimsEnabled"), emissions = BeamConfig.Beam.Exchange.Output.Emissions( if (c.hasPathOrNull("emissions")) c.getConfig("emissions") else com.typesafe.config.ConfigFactory.parseString("emissions{}") - ) + ), + generateSkimsForAllModes = + c.hasPathOrNull("generateSkimsForAllModes") && c.getBoolean("generateSkimsForAllModes"), + generateSkimsForRideHailTransit = + c.hasPathOrNull("generateSkimsForRideHailTransit") && c.getBoolean("generateSkimsForRideHailTransit"), + sendNonChosenTripsToSkimmer = + !c.hasPathOrNull("sendNonChosenTripsToSkimmer") || c.getBoolean("sendNonChosenTripsToSkimmer") ) } } @@ -4243,7 +4262,8 @@ object BeamConfig { fractionOfIterationsToDisableInnovation: scala.Double, maxAgentPlanMemorySize: scala.Int, planSelectionBeta: scala.Double, - replanningPenaltyInDollars: scala.Double + replanningPenaltyInDollars: scala.Double, + subtractExpectedScores: scala.Boolean ) object Replanning { @@ -4287,7 +4307,8 @@ object BeamConfig { if (c.hasPathOrNull("maxAgentPlanMemorySize")) c.getInt("maxAgentPlanMemorySize") else 5, planSelectionBeta = if (c.hasPathOrNull("planSelectionBeta")) c.getDouble("planSelectionBeta") else 1.0, replanningPenaltyInDollars = - if (c.hasPathOrNull("replanningPenaltyInDollars")) c.getDouble("replanningPenaltyInDollars") else 100.0 + if (c.hasPathOrNull("replanningPenaltyInDollars")) c.getDouble("replanningPenaltyInDollars") else 100.0, + subtractExpectedScores = !c.hasPathOrNull("subtractExpectedScores") || c.getBoolean("subtractExpectedScores") ) } } @@ -4532,6 +4553,7 @@ object BeamConfig { numberOfSamples: scala.Int, osmMapdbFile: java.lang.String, suboptimalMinutes: scala.Int, + suboptimalMinutesForDriveAccess: scala.Int, transitAlternativeList: java.lang.String, travelTimeNoiseFraction: scala.Double ) @@ -4617,6 +4639,9 @@ object BeamConfig { if (c.hasPathOrNull("osmMapdbFile")) c.getString("osmMapdbFile") else "/test/input/beamville/r5/osm.mapdb", suboptimalMinutes = if (c.hasPathOrNull("suboptimalMinutes")) c.getInt("suboptimalMinutes") else 10, + suboptimalMinutesForDriveAccess = + if (c.hasPathOrNull("suboptimalMinutesForDriveAccess")) c.getInt("suboptimalMinutesForDriveAccess") + else 2, transitAlternativeList = if (c.hasPathOrNull("transitAlternativeList")) c.getString("transitAlternativeList") else "SUBOPTIMAL", travelTimeNoiseFraction = diff --git a/src/main/scala/beam/sim/population/PopulationAttributes.scala b/src/main/scala/beam/sim/population/PopulationAttributes.scala index 2858b36fbda..1fdc6448986 100644 --- a/src/main/scala/beam/sim/population/PopulationAttributes.scala +++ b/src/main/scala/beam/sim/population/PopulationAttributes.scala @@ -31,6 +31,9 @@ case class AttributesOfIndividual( wheelchairUser: Boolean = false ) extends PopulationAttributes { lazy val hasModalityStyle: Boolean = modalityStyle.nonEmpty + private val hourConversion = 1.0 / 3600 + private val modeMultiplierCache = collection.concurrent.TrieMap[BeamMode, Double]() + private val poolingMultiplierCache = collection.concurrent.TrieMap[AutomationLevel, Double]() val busTransit: Set[BeamMode] = Set(BeamMode.BUS, BeamMode.WALK) val subwayTransit: Set[BeamMode] = Set(BeamMode.SUBWAY, BeamMode.WALK) @@ -48,38 +51,41 @@ case class AttributesOfIndividual( isRideHail: Boolean = false, isPooledTrip: Boolean = false ): Double = { - // NOTE: This is in hours - val isWorkTrip = destinationActivity match { - case None => - false - case Some(activity) => - activity.getType().equalsIgnoreCase("work") - } + val (linkId, travelTime) = IdAndTT + val isWorkTrip = destinationActivity.exists(_.getType.equalsIgnoreCase("work")) - val multiplier = beamMode match { - case CAR => - val vehicleAutomationLevel = getAutomationLevel(beamVehicleTypeId, beamServices) - if (isRideHail) { - if (isPooledTrip) { - getModeVotMultiplier(Option(RIDE_HAIL_POOLED), modeChoiceModel) * - getPooledFactor(vehicleAutomationLevel, modeChoiceModel.poolingMultipliers) - } else { - getModeVotMultiplier(Option(RIDE_HAIL), modeChoiceModel) - } - } else { - getSituationMultiplier( - IdAndTT._1, - IdAndTT._2, - isWorkTrip, - modeChoiceModel.situationMultipliers(beamMode), + val multiplier = if (beamMode == CAR) { + val vehicleAutomationLevel = getAutomationLevel(beamVehicleTypeId, beamServices) + if (isRideHail) { + if (isPooledTrip) { + modeMultiplierCache.getOrElseUpdate( + RIDE_HAIL_POOLED, + modeChoiceModel.modeMultipliers.getOrElse(Some(RIDE_HAIL_POOLED), 1.0) + ) * poolingMultiplierCache.getOrElseUpdate( vehicleAutomationLevel, - beamServices - ) * getModeVotMultiplier(Option(CAR), modeChoiceModel) + modeChoiceModel.poolingMultipliers.getOrElse(vehicleAutomationLevel, 1.0) + ) + } else { + modeMultiplierCache.getOrElseUpdate( + RIDE_HAIL, + modeChoiceModel.modeMultipliers.getOrElse(Some(RIDE_HAIL), 1.0) + ) } - case _ => - getModeVotMultiplier(Option(beamMode), modeChoiceModel) + } else { + getSituationMultiplier( + linkId, + travelTime, + isWorkTrip, + modeChoiceModel.situationMultipliers(beamMode), + vehicleAutomationLevel, + beamServices + ) * modeMultiplierCache.getOrElseUpdate(CAR, modeChoiceModel.modeMultipliers.getOrElse(Some(CAR), 1.0)) + } + } else { + modeMultiplierCache.getOrElseUpdate(beamMode, modeChoiceModel.modeMultipliers.getOrElse(Some(beamMode), 1.0)) } - multiplier * IdAndTT._2 / 3600 + + multiplier * travelTime * hourConversion } def getGeneralizedTimeOfLegForMNL( @@ -173,23 +179,29 @@ case class AttributesOfIndividual( homeToWork || workToHome } + private val automationLevelCache = collection.concurrent.TrieMap[Id[BeamVehicleType], AutomationLevel]() + private def getAutomationLevel( beamVehicleTypeId: Id[BeamVehicleType], beamServices: BeamServices ): AutomationLevel = { - val automationInt = if (beamServices.beamConfig.beam.agentsim.agents.modalBehaviors.overrideAutomationForVOTT) { - beamServices.beamConfig.beam.agentsim.agents.modalBehaviors.overrideAutomationLevel - } else { - beamServices.beamScenario.vehicleTypes(beamVehicleTypeId).automationLevel - } - automationInt match { - case 1 => levelLE2 - case 2 => levelLE2 - case 3 => level3 - case 4 => level4 - case 5 => level5 - case _ => levelLE2 - } + automationLevelCache.getOrElseUpdate( + beamVehicleTypeId, { + val automationInt = if (beamServices.beamConfig.beam.agentsim.agents.modalBehaviors.overrideAutomationForVOTT) { + beamServices.beamConfig.beam.agentsim.agents.modalBehaviors.overrideAutomationLevel + } else { + beamServices.beamScenario.vehicleTypes(beamVehicleTypeId).automationLevel + } + automationInt match { + case 1 => levelLE2 + case 2 => levelLE2 + case 3 => level3 + case 4 => level4 + case 5 => level5 + case _ => levelLE2 + } + } + ) } // Convert from seconds to hours and bring in person's base VOT diff --git a/src/main/scala/beam/sim/vehiclesharing/Fleets.scala b/src/main/scala/beam/sim/vehiclesharing/Fleets.scala index 84b06b711eb..377a2ceb66e 100644 --- a/src/main/scala/beam/sim/vehiclesharing/Fleets.scala +++ b/src/main/scala/beam/sim/vehiclesharing/Fleets.scala @@ -7,7 +7,7 @@ import beam.sim.config.BeamConfig.Beam.Agentsim.Agents.Vehicles.SharedFleets$Elm object Fleets { def lookup(config: BeamConfig.Beam.Agentsim.Agents.Vehicles.SharedFleets$Elm): FleetType = { - val vehicleManager = VehicleManager.createOrGetReservedFor(config.name, VehicleManager.TypeEnum.Shared) + val vehicleManager = VehicleManager.createOrGetReservedFor(config.name, Some(VehicleManager.TypeEnum.Shared)) val parkingFilePath = config.parkingFilePath config.managerType match { case "fixed-non-reserving-fleet-by-taz" => diff --git a/src/main/scala/beam/utils/BeamVehicleUtils.scala b/src/main/scala/beam/utils/BeamVehicleUtils.scala index 9f7a3477a17..74c9e61df60 100755 --- a/src/main/scala/beam/utils/BeamVehicleUtils.scala +++ b/src/main/scala/beam/utils/BeamVehicleUtils.scala @@ -81,6 +81,7 @@ object BeamVehicleUtils extends LazyLogging { 9000 // Class 4-6 (GVWR 14001-26000 lbs. => 6000-15000, and average of 8000-9000 lbs curb weight) case VehicleCategory.Class78Vocational => 13000 // CLass 7&8 (GVWR 26001 to >33,001 lbs.) case VehicleCategory.Class78Tractor => 20000 // CLass 7&8 (GVWR 26001 to >33,001 lbs.) +// case VehicleCategory.AnyCategory => 0 } def readBeamVehicleTypeFile(filePath: String): Map[Id[BeamVehicleType], BeamVehicleType] = { diff --git a/src/main/scala/beam/utils/DateUtils.scala b/src/main/scala/beam/utils/DateUtils.scala index dae0bda30f5..e4a17faa4d9 100755 --- a/src/main/scala/beam/utils/DateUtils.scala +++ b/src/main/scala/beam/utils/DateUtils.scala @@ -27,6 +27,11 @@ object DateUtils { timeAr(0).toInt * 3600 + timeAr(1).toInt * 60 + timeAr(2).toInt } + def getLastTransitTripTime(beamConfig: beam.sim.config.BeamConfig): Int = { + val timeAr = beamConfig.beam.agentsim.lastTransitTrip.split(":") + timeAr(0).toInt * 3600 + timeAr(1).toInt * 60 + timeAr(2).toInt + } + def getMaxHour(beamConfig: beam.sim.config.BeamConfig): Int = Math .ceil( diff --git a/src/main/scala/beam/utils/EventReader.scala b/src/main/scala/beam/utils/EventReader.scala index d826442b3b2..24aa828a2cf 100644 --- a/src/main/scala/beam/utils/EventReader.scala +++ b/src/main/scala/beam/utils/EventReader.scala @@ -1,23 +1,19 @@ package beam.utils -import java.io._ -import java.net.URL -import java.nio.charset.StandardCharsets -import java.util -import java.util.zip.GZIPInputStream - import beam.agentsim.events._ import org.matsim.api.core.v01.events.{Event, GenericEvent} import org.matsim.core.api.experimental.events.EventsManager import org.matsim.core.config.Config import org.matsim.core.events.handler.BasicEventHandler import org.matsim.core.events.{EventsUtils, MatsimEventsReader} -import org.matsim.core.utils.io.UnicodeInputStream import org.supercsv.io.CsvMapReader import org.supercsv.prefs.CsvPreference +import java.io._ +import java.net.URL +import java.util +import java.util.zip.GZIPInputStream import scala.collection.mutable.ArrayBuffer -import scala.reflect.ClassTag class DummyEvent(attribs: java.util.Map[String, String]) extends Event(attribs.get("time").toDouble) { override def getEventType: String = attribs.get("type") diff --git a/src/main/scala/beam/utils/csv/readers/BeamCsvScenarioReader.scala b/src/main/scala/beam/utils/csv/readers/BeamCsvScenarioReader.scala index cdd002e78ee..9214816e805 100644 --- a/src/main/scala/beam/utils/csv/readers/BeamCsvScenarioReader.scala +++ b/src/main/scala/beam/utils/csv/readers/BeamCsvScenarioReader.scala @@ -101,6 +101,8 @@ object BeamCsvScenarioReader extends BeamScenarioReader with ExponentialLazyLogg legMode = Option(rec.get("legMode")), legDepartureTime = Option(rec.get("legDepartureTime")), legTravelTime = Option(rec.get("legTravelTime")), + legExpectedTravelTime = Option(rec.get("trip_dur_min")).map(_.toDouble), + legExpectedCost = Option(rec.get("trip_cost_dollars")).map(_.toDouble), legRouteType = Option(rec.get("legRouteType")), legRouteStartLink = Option(rec.get("legRouteStartLink")), legRouteEndLink = Option(rec.get("legRouteEndLink")), diff --git a/src/main/scala/beam/utils/csv/writers/PlansCsvWriter.scala b/src/main/scala/beam/utils/csv/writers/PlansCsvWriter.scala index d864a3af320..66522cb3c33 100755 --- a/src/main/scala/beam/utils/csv/writers/PlansCsvWriter.scala +++ b/src/main/scala/beam/utils/csv/writers/PlansCsvWriter.scala @@ -25,6 +25,8 @@ object PlansCsvWriter extends ScenarioCsvWriter { "activityEndTime", "legMode", "legDepartureTime", + "trip_dur_min", + "trip_cost_dollars", "legTravelTime", "legRouteType", "legRouteStartLink", @@ -96,6 +98,12 @@ object PlansCsvWriter extends ScenarioCsvWriter { legMode = mode, legDepartureTime = leg.getDepartureTime.toOption.map(_.toString), legTravelTime = leg.getTravelTime.toOption.map(_.toString), + legExpectedTravelTime = + Option(leg.getAttributes.getAttribute("trip_dur_min")).map(_.toString).filterNot(_.isEmpty).map(_.toDouble), + legExpectedCost = Option(leg.getAttributes.getAttribute("trip_cost_dollars")) + .map(_.toString) + .filterNot(_.isEmpty) + .map(_.toDouble), legRouteType = route.map(_.getRouteType), legRouteStartLink = route.map(_.getStartLinkId.toString), legRouteEndLink = route.map(_.getEndLinkId.toString), @@ -121,6 +129,8 @@ object PlansCsvWriter extends ScenarioCsvWriter { legMode = None, legDepartureTime = None, legTravelTime = None, + legExpectedTravelTime = None, + legExpectedCost = None, legRouteType = None, legRouteStartLink = None, legRouteEndLink = None, @@ -158,6 +168,8 @@ object PlansCsvWriter extends ScenarioCsvWriter { planInfo.activityEndTime.map(_.toString).getOrElse(""), planInfo.legMode.getOrElse(""), planInfo.legDepartureTime.getOrElse(""), + planInfo.legExpectedTravelTime.getOrElse(""), + planInfo.legExpectedCost.getOrElse(""), planInfo.legTravelTime.getOrElse(""), planInfo.legRouteType.getOrElse(""), planInfo.legRouteStartLink.getOrElse(""), diff --git a/src/main/scala/beam/utils/scenario/BeamScenarioLoader.scala b/src/main/scala/beam/utils/scenario/BeamScenarioLoader.scala index 0b88db20014..5de041b77f2 100644 --- a/src/main/scala/beam/utils/scenario/BeamScenarioLoader.scala +++ b/src/main/scala/beam/utils/scenario/BeamScenarioLoader.scala @@ -379,8 +379,7 @@ object BeamScenarioLoader extends ExponentialLazyLogging { val beamVehicleType = map(beamVehicleTypeId) - val vehicleManagerId = - VehicleManager.createOrGetReservedFor(info.householdId, VehicleManager.TypeEnum.Household).managerId + val vehicleManagerId = VehicleManager.createOrGetReservedFor(info.householdId).managerId val powerTrain = new Powertrain(beamVehicleType.primaryFuelConsumptionInJoulePerMeter) new BeamVehicle( beamVehicleId, diff --git a/src/main/scala/beam/utils/scenario/Models.scala b/src/main/scala/beam/utils/scenario/Models.scala index c90a4197a54..d93e9f57334 100644 --- a/src/main/scala/beam/utils/scenario/Models.scala +++ b/src/main/scala/beam/utils/scenario/Models.scala @@ -56,6 +56,8 @@ case class PlanElement( legMode: Option[String], legDepartureTime: Option[String], legTravelTime: Option[String], + legExpectedTravelTime: Option[Double], + legExpectedCost: Option[Double], legRouteType: Option[String], legRouteStartLink: Option[String], legRouteEndLink: Option[String], diff --git a/src/main/scala/beam/utils/scenario/UrbanSimScenarioLoader.scala b/src/main/scala/beam/utils/scenario/UrbanSimScenarioLoader.scala index 3c0f970302b..e26f431d3b4 100644 --- a/src/main/scala/beam/utils/scenario/UrbanSimScenarioLoader.scala +++ b/src/main/scala/beam/utils/scenario/UrbanSimScenarioLoader.scala @@ -19,6 +19,7 @@ import org.matsim.core.population.routes.{NetworkRoute, RouteUtils} import org.matsim.core.scenario.MutableScenario import org.matsim.households._ import org.matsim.vehicles.{Vehicle, VehicleType, VehicleUtils} +import beam.utils.OptionalUtils.OptionalTimeExtension import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer @@ -54,10 +55,16 @@ class UrbanSimScenarioLoader( private def buildAndAddLegToPlan(currentPlan: Plan, planElement: PlanElement): Leg = { val leg = PopulationUtils.createAndAddLeg(currentPlan, planElement.legMode.getOrElse("")) planElement.legDepartureTime.foreach(v => leg.setDepartureTime(v.toDouble)) - planElement.legTravelTime.foreach(v => leg.setTravelTime(v.toDouble)) + planElement.legTravelTime.foreach(travelTimeStr => { + val travelTime = travelTimeStr.toDouble + if (travelTime == beam.UNDEFINED_TIME) leg.setTravelTimeUndefined() + else leg.setTravelTime(travelTime) + }) planElement.legMode.foreach(v => leg.setMode(v)) leg.getAttributes.putAttribute("trip_id", planElement.tripId) leg.getAttributes.putAttribute("tour_id", planElement.tourId) + leg.getAttributes.putAttribute("trip_dur_min", planElement.legExpectedTravelTime.getOrElse("")) + leg.getAttributes.putAttribute("trip_cost_dollars", planElement.legExpectedCost.getOrElse("")) val legRoute: NetworkRoute = { val links = planElement.legRouteLinks.map(v => Id.create(v, classOf[Link])).asJava diff --git a/src/main/scala/beam/utils/scenario/generic/readers/PlanElementReader.scala b/src/main/scala/beam/utils/scenario/generic/readers/PlanElementReader.scala index 9560885dc46..e51063d066c 100644 --- a/src/main/scala/beam/utils/scenario/generic/readers/PlanElementReader.scala +++ b/src/main/scala/beam/utils/scenario/generic/readers/PlanElementReader.scala @@ -70,6 +70,8 @@ object CsvPlanElementReader extends PlanElementReader { legMode = Option(rec.get("legMode")), legDepartureTime = Option(rec.get("legDepartureTime")), legTravelTime = Option(rec.get("legTravelTime")), + legExpectedTravelTime = Option(rec.get("trip_dur_min")).map(_.toDouble), + legExpectedCost = Option(rec.get("trip_cost_dollars")).map(_.toDouble), legRouteType = Option(rec.get("legRouteType")), legRouteStartLink = Option(rec.get("legRouteStartLink")), legRouteEndLink = Option(rec.get("legRouteEndLink")), @@ -96,8 +98,10 @@ object XmlPlanElementReader extends PlanElementReader { } } .collect { - case (person, plan, planIdx, act: Activity, planElIdx) => toPlanElement(act, plan, planIdx, person, planElIdx) - case (person, plan, planIdx, leg: Leg, planElIdx) => toPlanElement(leg, plan, planIdx, person, planElIdx) + case (person, plan, planIdx, act: Activity, planElIdx) if act != null => + toPlanElement(act, plan, planIdx, person, planElIdx) + case (person, plan, planIdx, leg: Leg, planElIdx) if leg != null => + toPlanElement(leg, plan, planIdx, person, planElIdx) } .toArray } @@ -162,6 +166,8 @@ object XmlPlanElementReader extends PlanElementReader { legMode = None, legDepartureTime = None, legTravelTime = None, + legExpectedTravelTime = None, + legExpectedCost = None, legRouteType = None, legRouteStartLink = None, legRouteEndLink = None, @@ -190,9 +196,17 @@ object XmlPlanElementReader extends PlanElementReader { activityLocationX = None, activityLocationY = None, activityEndTime = None, - legMode = Option(leg.getMode), + legMode = Option(leg).map(_.getMode), legDepartureTime = leg.getDepartureTime.toOption.map(_.toString), legTravelTime = leg.getTravelTime.toOption.map(_.toString), + legExpectedTravelTime = Option(leg.getAttributes.getAttribute("trip_dur_min")) + .filter(_.toString.nonEmpty) + .map(_.toString) + .map(_.toDouble), + legExpectedCost = Option(leg.getAttributes.getAttribute("trip_cost_dollars")) + .filter(_.toString.nonEmpty) + .map(_.toString) + .map(_.toDouble), legRouteType = Option(leg.getRoute).map(_.getRouteType), legRouteStartLink = Option(leg.getRoute).map(_.getStartLinkId.toString), legRouteEndLink = Option(leg.getRoute).map(_.getEndLinkId.toString), diff --git a/src/main/scala/beam/utils/scenario/urbansim/HOVModeTransformer.scala b/src/main/scala/beam/utils/scenario/urbansim/HOVModeTransformer.scala index af690c36fd4..0d83ddd9beb 100644 --- a/src/main/scala/beam/utils/scenario/urbansim/HOVModeTransformer.scala +++ b/src/main/scala/beam/utils/scenario/urbansim/HOVModeTransformer.scala @@ -37,6 +37,15 @@ import scala.util.Random */ object HOVModeTransformer extends ExponentialLazyLogging { + private val summaryStats = mutable.Map[String, Int]( + "forcedHOV2Teleports" -> 0, + "forcedHOV3Teleports" -> 0, + "forcedCarHOV2Count" -> 0, + "forcedCarHOV3Count" -> 0, + "randomHOV2Choices" -> 0, + "randomHOV3Choices" -> 0 + ) + def reseedRandomGenerator(randomSeed: Int): Unit = rand.setSeed(randomSeed) private implicit val rand: Random = new Random(42) @@ -65,13 +74,12 @@ object HOVModeTransformer extends ExponentialLazyLogging { def transformHOVtoHOVCARorHOVTeleportation( plansProbablyWithHOV: Iterable[PlanElement] ): Iterable[PlanElement] = { - val allHOVUsers: Set[PersonId] = plansProbablyWithHOV - .filter(planElement => { - val legMode = planElement.legMode.map(_.toLowerCase) - legMode.contains(hov2) || legMode.contains(hov3) - }) - .map(_.personId) - .toSet + + val allHOVUsers: Set[PersonId] = + plansProbablyWithHOV + .filter(x => x.legMode.exists(m => m.contains(hov2) || m.contains(hov3))) + .map(_.personId) + .toSet var forcedHOV2Teleports = 0 var forcedHOV3Teleports = 0 @@ -91,10 +99,12 @@ object HOVModeTransformer extends ExponentialLazyLogging { trip.map { case hov2Leg if itIsAnHOV2Leg(hov2Leg) => forcedHOV2Teleports -= 1 + summaryStats("forcedCarHOV2Count") += 1 hov2Leg.copy(legMode = Some(CAR_HOV2.value)) case hov3Leg if itIsAnHOV3Leg(hov3Leg) => // as car_hov3 contains two passengers, reduce by 2 forcedHOV3Teleports -= 2 + summaryStats("forcedCarHOV3Count") += 1 hov3Leg.copy(legMode = Some(CAR_HOV3.value)) case other => other } @@ -104,9 +114,11 @@ object HOVModeTransformer extends ExponentialLazyLogging { trip.map { case hov2Leg if itIsAnHOV2Leg(hov2Leg) => forcedCarHOV2Count -= 1 + summaryStats("forcedHOV2Teleports") += 1 hov2Leg.copy(legMode = Some(HOV2_TELEPORTATION.value)) case hov3Leg if itIsAnHOV3Leg(hov3Leg) => forcedCarHOV3Count -= 1 + summaryStats("forcedHOV3Teleports") += 1 hov3Leg.copy(legMode = Some(HOV3_TELEPORTATION.value)) case other => other } @@ -119,23 +131,39 @@ object HOVModeTransformer extends ExponentialLazyLogging { val (mappedTrip, forcedHOV2, forcedHOV3) = mapToForcedHOVTeleportation(trip) forcedHOV2Teleports += forcedHOV2 forcedHOV3Teleports += forcedHOV3 + summaryStats("forcedHOV2Teleports") += forcedHOV2 + summaryStats("forcedHOV3Teleports") += forcedHOV3 mappedTrip } else if (isForcedCarHOVTrip(trip)) { val (mappedTrip, forcedHOV2, forcedHOV3) = mapToForcedCarHOVTrip(trip) forcedCarHOV2Count += forcedHOV2 forcedCarHOV3Count += forcedHOV3 + summaryStats("forcedCarHOV2Count") += forcedHOV2 + summaryStats("forcedCarHOV3Count") += forcedHOV3 mappedTrip } else if (thereAreMoreHOVTeleportations) { replaceHOVwithCar(trip) } else if (thereAreMoreHOVCars) { replaceHOVwithTeleportation(trip) } else { - mapRandomHOVTeleportationOrCar(trip) + val transformedTrip = mapRandomHOVTeleportationOrCar(trip) + transformedTrip.foreach { + case leg if itIsAnHOV2Leg(leg) => summaryStats("randomHOV2Choices") += 1 + case leg if itIsAnHOV3Leg(leg) => summaryStats("randomHOV3Choices") += 1 + case _ => + } + transformedTrip } } else { trip } } + + // Log summary at the end + logger.info( + s"Summary of HOV transformation: ${summaryStats.map { case (k, v) => s"$k: $v" }.mkString(", ")}" + ) + // we need to merge plans without creating duplicates of home activity for persons with more than one trip val plans = joinTripsIntoPlans(tripsTransformed) plans @@ -252,12 +280,12 @@ object HOVModeTransformer extends ExponentialLazyLogging { def itIsAnHOV2Leg(planElement: PlanElement): Boolean = { planElement.planElementType == PlanElement.Leg && - planElement.legMode.exists(legMode => legMode.toLowerCase == hov2) + planElement.legMode.exists(legMode => legMode.contains(hov2)) } def itIsAnHOV3Leg(planElement: PlanElement): Boolean = { planElement.planElementType == PlanElement.Leg && - planElement.legMode.exists(legMode => legMode.toLowerCase == hov3) + planElement.legMode.exists(legMode => legMode.contains(hov3)) } def itIsASOVLeg(planElement: PlanElement): Boolean = { diff --git a/src/main/scala/beam/utils/scenario/urbansim/UrbanSimScenarioSource.scala b/src/main/scala/beam/utils/scenario/urbansim/UrbanSimScenarioSource.scala index f09eb951225..57d70efa3e7 100644 --- a/src/main/scala/beam/utils/scenario/urbansim/UrbanSimScenarioSource.scala +++ b/src/main/scala/beam/utils/scenario/urbansim/UrbanSimScenarioSource.scala @@ -75,6 +75,8 @@ class UrbanSimScenarioSource( // TODO: DataExchange.PlanElement does not have the following leg information legDepartureTime = None, legTravelTime = None, + legExpectedTravelTime = None, + legExpectedCost = None, legRouteType = None, legRouteStartLink = None, legRouteEndLink = None, diff --git a/src/main/scala/beam/utils/scenario/urbansim/censusblock/entities/InputPlanElement.scala b/src/main/scala/beam/utils/scenario/urbansim/censusblock/entities/InputPlanElement.scala index eeff65464ba..fdad8d6b379 100644 --- a/src/main/scala/beam/utils/scenario/urbansim/censusblock/entities/InputPlanElement.scala +++ b/src/main/scala/beam/utils/scenario/urbansim/censusblock/entities/InputPlanElement.scala @@ -14,7 +14,9 @@ case class InputPlanElement( ActivityType: Option[String], x: Option[Double], y: Option[Double], - departureTime: Option[Double] + departureTime: Option[Double], + expectedDurationMinutes: Option[Double] = None, + expectedCostDollars: Option[Double] = None ) object InputPlanElement extends EntityTransformer[InputPlanElement] { @@ -30,6 +32,8 @@ object InputPlanElement extends EntityTransformer[InputPlanElement] { val xWgs = getOptional(m, "x").map(_.toDouble) val yWgs = getOptional(m, "y").map(_.toDouble) val departureTime = getOptional(m, "departure_time").map(_.toDouble) + val expectedDurationMinutes = getOptional(m, "trip_dur_min").map(_.toDouble) + val expectedCostDollars = getOptional(m, "trip_cost_dollars").map(_.toDouble) InputPlanElement( tripId, @@ -41,7 +45,9 @@ object InputPlanElement extends EntityTransformer[InputPlanElement] { activityType, xWgs, yWgs, - departureTime + departureTime, + expectedDurationMinutes, + expectedCostDollars ) } } diff --git a/src/main/scala/beam/utils/scenario/urbansim/censusblock/entities/PersonInfo.scala b/src/main/scala/beam/utils/scenario/urbansim/censusblock/entities/PersonInfo.scala index 3f91f9eca98..9b1269cefe6 100644 --- a/src/main/scala/beam/utils/scenario/urbansim/censusblock/entities/PersonInfo.scala +++ b/src/main/scala/beam/utils/scenario/urbansim/censusblock/entities/PersonInfo.scala @@ -1,8 +1,8 @@ package beam.utils.scenario.urbansim.censusblock.entities import java.util - import beam.utils.scenario.urbansim.censusblock.EntityTransformer +import org.apache.commons.lang3.math.NumberUtils import scala.annotation.switch @@ -31,7 +31,8 @@ case class InputPersonInfo( householdId: String, age: Int, sex: Sex, - industry: Option[String] + industry: Option[String], + valueOfTime: Option[Double] ) object InputPersonInfo extends EntityTransformer[InputPersonInfo] { @@ -42,7 +43,8 @@ object InputPersonInfo extends EntityTransformer[InputPersonInfo] { val age = getIfNotNull(rec, "age").toInt val sex = Sex.determineSex(getIfNotNull(rec, "sex").toInt) val industry = Option(rec.get("industry")) + val valueOfTime = Option(NumberUtils.toDouble(rec.get("value_of_time"))) - InputPersonInfo(personId, householdId, age, sex, industry) + InputPersonInfo(personId, householdId, age, sex, industry, valueOfTime) } } diff --git a/src/main/scala/beam/utils/scenario/urbansim/censusblock/merger/PersonMerger.scala b/src/main/scala/beam/utils/scenario/urbansim/censusblock/merger/PersonMerger.scala index d0cee5b0497..b80bed9c1de 100644 --- a/src/main/scala/beam/utils/scenario/urbansim/censusblock/merger/PersonMerger.scala +++ b/src/main/scala/beam/utils/scenario/urbansim/censusblock/merger/PersonMerger.scala @@ -8,8 +8,9 @@ class PersonMerger(inputHousehold: Map[String, InputHousehold]) extends Merger[I override def merge(iter: Iterator[InputPersonInfo]): Iterator[PersonInfo] = iter.map(inputToOutput) private def inputToOutput(inputPersonInfo: InputPersonInfo): PersonInfo = { - val inputIncome = inputHousehold(inputPersonInfo.householdId).income - val income = PopulationAdjustment.incomeToValueOfTime(inputIncome).getOrElse(0d) + val valueOfTime = inputPersonInfo.valueOfTime.getOrElse( + PopulationAdjustment.incomeToValueOfTime(inputHousehold(inputPersonInfo.householdId).income).getOrElse(0d) + ) PersonInfo( personId = PersonId(inputPersonInfo.personId), @@ -19,7 +20,7 @@ class PersonMerger(inputHousehold: Map[String, InputHousehold]) extends Merger[I excludedModes = Seq.empty, rideHailServiceSubscription = Seq.empty, isFemale = inputPersonInfo.sex.isFemale, - valueOfTime = income, + valueOfTime = valueOfTime, industry = inputPersonInfo.industry ) } diff --git a/src/main/scala/beam/utils/scenario/urbansim/censusblock/merger/PlanMerger.scala b/src/main/scala/beam/utils/scenario/urbansim/censusblock/merger/PlanMerger.scala index a7b10ac2ef5..851df0bb685 100644 --- a/src/main/scala/beam/utils/scenario/urbansim/censusblock/merger/PlanMerger.scala +++ b/src/main/scala/beam/utils/scenario/urbansim/censusblock/merger/PlanMerger.scala @@ -24,6 +24,8 @@ class PlanMerger(modeMap: Map[String, String]) extends Merger[InputPlanElement, inputPlanElement.tripMode.map(convertMode), legDepartureTime = None, legTravelTime = None, + legExpectedTravelTime = inputPlanElement.expectedDurationMinutes, + legExpectedCost = inputPlanElement.expectedCostDollars, legRouteType = None, legRouteStartLink = None, legRouteEndLink = None, diff --git a/src/main/scala/beam/utils/scenario/urbansim/censusblock/reader/CsvReaders.scala b/src/main/scala/beam/utils/scenario/urbansim/censusblock/reader/CsvReaders.scala index 77c5dacccf5..a1a44760f4c 100644 --- a/src/main/scala/beam/utils/scenario/urbansim/censusblock/reader/CsvReaders.scala +++ b/src/main/scala/beam/utils/scenario/urbansim/censusblock/reader/CsvReaders.scala @@ -18,7 +18,8 @@ class CsvPersonReader(path: String) extends BaseCsvReader[InputPersonInfo](path) householdId = record.get("household_id"), age = record.get("age").toInt, sex = Sex.determineSex(record.get("sex").toInt), - industry = Option(record.get("industry")) + industry = Option(record.get("industry")), + valueOfTime = Option(record.get("value_of_time")).map(_.toDouble) ) } } @@ -40,7 +41,9 @@ class CsvPlanReader(path: String) extends BaseCsvReader[InputPlanElement](path) ActivityType = Option(record.get("ActivityType")), x = Option(record.get("x")).map(_.toDouble), y = Option(record.get("y")).map(_.toDouble), - departureTime = Option(record.get("departure_time")).map(_.toDouble) + departureTime = Option(record.get("departure_time")).map(_.toDouble), + expectedDurationMinutes = Option(record.get("trip_dur_min")).map(_.toString.toDouble), + expectedCostDollars = Option(record.get("trip_cost_dollars")).map(_.toString.toDouble) ) } } @@ -54,7 +57,7 @@ class CsvHouseholdReader(path: String) extends BaseCsvReader[InputHousehold](pat InputHousehold( householdId = record.get("household_id"), cars = Try(record.get("cars").toInt).getOrElse(record.get("auto_ownership").toInt), - income = record.get("income").toInt, + income = Math.round(record.get("income").toFloat), blockId = record.get("block_id").toLong ) } diff --git a/src/main/scala/beam/utils/scenario/urbansim/censusblock/reader/ParquetReaders.scala b/src/main/scala/beam/utils/scenario/urbansim/censusblock/reader/ParquetReaders.scala index abdcba51da8..e440dbfcb49 100644 --- a/src/main/scala/beam/utils/scenario/urbansim/censusblock/reader/ParquetReaders.scala +++ b/src/main/scala/beam/utils/scenario/urbansim/censusblock/reader/ParquetReaders.scala @@ -21,7 +21,8 @@ class ParquetPersonReader(path: String) extends BaseParquetReader[InputPersonInf householdId = record.get("household_id").toString.split("\\.")(0), age = record.get("age").toString.toDouble.toInt, sex = Sex.determineSex(record.get("sex").toString.toDouble.toInt), - industry = industryField + industry = industryField, + valueOfTime = Try(record.get("value_of_time")).map(_.toString.toDouble).toOption // TODO: probably a better way ) } } @@ -36,11 +37,13 @@ class ParquetPlanReader(path: String) extends BaseParquetReader[InputPlanElement personId = personId, planElementIndex = record.get("PlanElementIndex").toString.toInt, activityElement = ActivityType.determineActivity(record.get("ActivityElement").toString), - tripMode = Option(record.get("trip_mode")).map(_.toString), - ActivityType = Option(record.get("ActivityType")).map(_.toString), + tripMode = Option(record.get("trip_mode")).map(_.toString).filterNot(_ == "nan"), + ActivityType = Option(record.get("ActivityType")).map(_.toString).filterNot(_ == "nan"), x = Option(record.get("x")).map(_.toString.toDouble), y = Option(record.get("y")).map(_.toString.toDouble), - departureTime = Option(record.get("departure_time")).map(_.toString.toDouble) + departureTime = Option(record.get("departure_time")).map(_.toString.toDouble), + expectedDurationMinutes = Option(record.get("trip_dur_min")).map(_.toString.toDouble), + expectedCostDollars = Option(record.get("trip_cost_dollars")).map(_.toString.toDouble) ) } } diff --git a/src/main/scala/scripts/NetworkRelaxationScenarioGenerator.scala b/src/main/scala/scripts/NetworkRelaxationScenarioGenerator.scala index 9758814c821..37f544dd1bf 100644 --- a/src/main/scala/scripts/NetworkRelaxationScenarioGenerator.scala +++ b/src/main/scala/scripts/NetworkRelaxationScenarioGenerator.scala @@ -42,11 +42,15 @@ class NetworkRelaxationScenarioGenerator { householdId = i.toString, age = 20 + Random.nextInt(50), sex = if (Random.nextBoolean()) Male else Female, + None, None ) } val csvWriter = - new CsvWriter(scenarioDir.getPath + "/persons.csv.gz", Seq("person_id", "household_id", "age", "sex")) + new CsvWriter( + scenarioDir.getPath + "/persons.csv.gz", + Seq("person_id", "household_id", "age", "sex", "value_of_time") + ) try { persons.foreach { person => csvWriter.write( @@ -56,7 +60,8 @@ class NetworkRelaxationScenarioGenerator { person.sex match { case Male => 1 case Female => 2 - } + }, + person.valueOfTime.getOrElse("") ) } } finally { diff --git a/src/main/scala/scripts/R5Requester.scala b/src/main/scala/scripts/R5Requester.scala index 5ace76c5f69..bbd0405373f 100644 --- a/src/main/scala/scripts/R5Requester.scala +++ b/src/main/scala/scripts/R5Requester.scala @@ -7,6 +7,7 @@ import beam.router.BeamRouter.{Location, RoutingRequest} import beam.router.Modes.BeamMode import beam.router.r5.{R5Parameters, R5Wrapper} import beam.router.{BeamRouter, FreeFlowTravelTime} +import beam.router.BeamTravelTime import beam.sim.BeamHelper import beam.sim.common.GeoUtils import beam.sim.population.{AttributesOfIndividual, HouseholdAttributes} @@ -368,6 +369,8 @@ object R5Requester extends BeamHelper { None, None, None, + None, + None, Seq.empty, None ) diff --git a/src/main/scala/scripts/calibration/ExperimentRunner.scala b/src/main/scala/scripts/calibration/ExperimentRunner.scala index 463535efc34..9279edb0b17 100755 --- a/src/main/scala/scripts/calibration/ExperimentRunner.scala +++ b/src/main/scala/scripts/calibration/ExperimentRunner.scala @@ -85,7 +85,7 @@ case class ExperimentRunner()(implicit experimentData: SigoptExperimentData) ext } if (objectiveFunctionClassName.equals("CountsObjectiveFunction")) { val outpath = Paths.get( - ioController.getIterationFilename(runConfig.controler().getLastIteration, "countscompare.txt") + ioController.getIterationFilename(runConfig.controler().getLastIteration, "countsCompare.txt") ) CountsObjectiveFunction.evaluateFromRun(outpath.toAbsolutePath.toString) } else if (objectiveFunctionClassName.equals("ModeChoiceObjectiveFunction_RMSPE") && benchmarkFileExists) { @@ -120,7 +120,7 @@ case class ExperimentRunner()(implicit experimentData: SigoptExperimentData) ext ) } else if (objectiveFunctionClassName.equals("ModeChoiceAndCountsObjectiveFunction") && benchmarkFileExists) { var outpath = Paths.get( - ioController.getIterationFilename(runConfig.controler().getLastIteration, "countscompare.txt") + ioController.getIterationFilename(runConfig.controler().getLastIteration, "countsCompare.txt") ) val countsObjVal = CountsObjectiveFunction.evaluateFromRun(outpath.toAbsolutePath.toString) diff --git a/src/main/scala/scripts/shape/FilterPointsInShapeFile.scala b/src/main/scala/scripts/shape/FilterPointsInShapeFile.scala index 8a59ed052ff..611c34bf6b0 100644 --- a/src/main/scala/scripts/shape/FilterPointsInShapeFile.scala +++ b/src/main/scala/scripts/shape/FilterPointsInShapeFile.scala @@ -34,8 +34,8 @@ object FilterPointsInShapeFile { val points = planReader .iterator() .flatMap { - case InputPlanElement(_, _, personId, _, _, _, _, Some(x), Some(y), _) => Some(personId, x, y) - case _ => None + case InputPlanElement(_, _, personId, _, _, _, _, Some(x), Some(y), _, _, _) => Some(personId, x, y) + case _ => None } .map { case (personId, x, y) => (personId, gf.createPoint(new Coordinate(x, y))) } diff --git a/src/main/scala/scripts/synthpop/PumaLevelScenarioGenerator.scala b/src/main/scala/scripts/synthpop/PumaLevelScenarioGenerator.scala index 67c83abb0c7..4922ac069e7 100644 --- a/src/main/scala/scripts/synthpop/PumaLevelScenarioGenerator.scala +++ b/src/main/scala/scripts/synthpop/PumaLevelScenarioGenerator.scala @@ -73,6 +73,8 @@ class PumaLevelScenarioGenerator( legMode = None, legDepartureTime = None, legTravelTime = None, + legExpectedTravelTime = None, + legExpectedCost = None, legRouteType = None, legRouteStartLink = None, legRouteEndLink = None, diff --git a/src/main/scala/scripts/synthpop/ScenarioGenerator.scala b/src/main/scala/scripts/synthpop/ScenarioGenerator.scala index 069cb2b4506..49fa0bc7733 100644 --- a/src/main/scala/scripts/synthpop/ScenarioGenerator.scala +++ b/src/main/scala/scripts/synthpop/ScenarioGenerator.scala @@ -98,6 +98,8 @@ class SimpleScenarioGenerator( legMode = None, legDepartureTime = None, legTravelTime = None, + legExpectedTravelTime = None, + legExpectedCost = None, legRouteType = None, legRouteStartLink = None, legRouteEndLink = None, diff --git a/src/test/java/beam/analysis/plots/FuelUsageGraphTest.java b/src/test/java/beam/analysis/plots/FuelUsageGraphTest.java index 0e4b534df49..29686853ee3 100755 --- a/src/test/java/beam/analysis/plots/FuelUsageGraphTest.java +++ b/src/test/java/beam/analysis/plots/FuelUsageGraphTest.java @@ -25,7 +25,7 @@ private static class FuelUsageHandler implements BasicEventHandler { @Override public void handleEvent(Event event) { - if (event instanceof PathTraversalEvent ) { + if (event instanceof PathTraversalEvent) { fuelUsageStats.processStats(event); } } @@ -49,7 +49,7 @@ public void setUpClass() { @Test public void testShouldPassShouldReturnPathTraversalEventCarFuel() { - long expectedResult = 675705873L; + long expectedResult = 675705883L; int maxHour = getMaxHour(stats.keySet()); long actualResult = getFuelageHoursDataCountOccurrenceAgainstMode(CAR, maxHour, stats); assertEquals(expectedResult, actualResult); @@ -57,7 +57,7 @@ public void testShouldPassShouldReturnPathTraversalEventCarFuel() { @Test public void testShouldPassShouldReturnPathTraversalBusFuel() { - long expectedResult = 135249995867L; + long expectedResult = 135249993216L; int maxHour = getMaxHour(stats.keySet()); long actualResult = getFuelageHoursDataCountOccurrenceAgainstMode(BUS, maxHour, stats); assertEquals(expectedResult, actualResult); diff --git a/src/test/scala/beam/agentsim/agents/PersonAgentSpec.scala b/src/test/scala/beam/agentsim/agents/PersonAgentSpec.scala index b466ead3190..f4325f928dd 100644 --- a/src/test/scala/beam/agentsim/agents/PersonAgentSpec.scala +++ b/src/test/scala/beam/agentsim/agents/PersonAgentSpec.scala @@ -405,7 +405,7 @@ class PersonAgentSpec population = population, household = household, vehicles = Map(), - fallbackHomeCoord = new Coord(0.0, 0.0), + fallbackInitialLocationCoord = new Coord(0.0, 0.0), Vector(), Set.empty, new RouteHistory(beamConfig), diff --git a/src/test/scala/beam/agentsim/agents/PersonAndTransitDriverSpec.scala b/src/test/scala/beam/agentsim/agents/PersonAndTransitDriverSpec.scala index aa3930bf762..d1d92590d5b 100644 --- a/src/test/scala/beam/agentsim/agents/PersonAndTransitDriverSpec.scala +++ b/src/test/scala/beam/agentsim/agents/PersonAndTransitDriverSpec.scala @@ -300,7 +300,7 @@ class PersonAndTransitDriverSpec population = population, household = household, vehicles = Map(), - fallbackHomeCoord = new Coord(0.0, 0.0), + fallbackInitialLocationCoord = new Coord(0.0, 0.0), Vector(), Set.empty, new RouteHistory(beamConfig), diff --git a/src/test/scala/beam/agentsim/agents/PersonWithVehicleSharingSpec.scala b/src/test/scala/beam/agentsim/agents/PersonWithVehicleSharingSpec.scala index 8082e1c18d1..e20bb2b34ee 100644 --- a/src/test/scala/beam/agentsim/agents/PersonWithVehicleSharingSpec.scala +++ b/src/test/scala/beam/agentsim/agents/PersonWithVehicleSharingSpec.scala @@ -154,7 +154,8 @@ class PersonWithVehicleSharingSpec mockSharedVehicleFleet.expectMsgType[MobilityStatusInquiry] val vehicleType = beamScenario.vehicleTypes(Id.create("sharedVehicle-sharedCar", classOf[BeamVehicleType])) - val managerId = VehicleManager.createOrGetReservedFor("shared-fleet-1", VehicleManager.TypeEnum.Shared).managerId + val managerId = + VehicleManager.createOrGetReservedFor("shared-fleet-1", Some(VehicleManager.TypeEnum.Shared)).managerId // I give it a car to use. val vehicle = new BeamVehicle( vehicleId, @@ -315,7 +316,8 @@ class PersonWithVehicleSharingSpec val vehicleType = beamScenario.vehicleTypes(Id.create("sharedVehicle-sharedCar", classOf[BeamVehicleType])) // I give it a car to use. - val managerId = VehicleManager.createOrGetReservedFor("shared-fleet-1", VehicleManager.TypeEnum.Shared).managerId + val managerId = + VehicleManager.createOrGetReservedFor("shared-fleet-1", Some(VehicleManager.TypeEnum.Shared)).managerId val vehicle = new BeamVehicle( vehicleId, new Powertrain(0.0), diff --git a/src/test/scala/beam/agentsim/agents/ridehail/MatchingAlgorithmsForRideHailSpec.scala b/src/test/scala/beam/agentsim/agents/ridehail/MatchingAlgorithmsForRideHailSpec.scala index f924fc24ab0..d89bdc62064 100644 --- a/src/test/scala/beam/agentsim/agents/ridehail/MatchingAlgorithmsForRideHailSpec.scala +++ b/src/test/scala/beam/agentsim/agents/ridehail/MatchingAlgorithmsForRideHailSpec.scala @@ -230,7 +230,7 @@ object MatchingAlgorithmsForRideHailSpec { VehicleManager .createOrGetReservedFor( services.beamConfig.beam.agentsim.agents.rideHail.managers.head.name, - VehicleManager.TypeEnum.RideHail + Some(VehicleManager.TypeEnum.RideHail) ) .managerId ) @@ -245,7 +245,7 @@ object MatchingAlgorithmsForRideHailSpec { VehicleManager .createOrGetReservedFor( services.beamConfig.beam.agentsim.agents.rideHail.managers.head.name, - VehicleManager.TypeEnum.RideHail + Some(VehicleManager.TypeEnum.RideHail) ) .managerId ) @@ -314,7 +314,7 @@ object MatchingAlgorithmsForRideHailSpec { VehicleManager .createOrGetReservedFor( services.beamConfig.beam.agentsim.agents.rideHail.managers.head.name, - VehicleManager.TypeEnum.RideHail + Some(VehicleManager.TypeEnum.RideHail) ) .managerId ) @@ -329,7 +329,7 @@ object MatchingAlgorithmsForRideHailSpec { VehicleManager .createOrGetReservedFor( services.beamConfig.beam.agentsim.agents.rideHail.managers.head.name, - VehicleManager.TypeEnum.RideHail + Some(VehicleManager.TypeEnum.RideHail) ) .managerId ) @@ -424,7 +424,7 @@ object MatchingAlgorithmsForRideHailSpec { VehicleManager .createOrGetReservedFor( services.beamConfig.beam.agentsim.agents.rideHail.managers.head.name, - VehicleManager.TypeEnum.RideHail + Some(VehicleManager.TypeEnum.RideHail) ) .managerId ) @@ -439,7 +439,7 @@ object MatchingAlgorithmsForRideHailSpec { VehicleManager .createOrGetReservedFor( services.beamConfig.beam.agentsim.agents.rideHail.managers.head.name, - VehicleManager.TypeEnum.RideHail + Some(VehicleManager.TypeEnum.RideHail) ) .managerId ) diff --git a/src/test/scala/beam/agentsim/agents/vehicles/LinkStateOfChargeSpec.scala b/src/test/scala/beam/agentsim/agents/vehicles/LinkStateOfChargeSpec.scala index 32c40835be1..0b24a8d28f1 100644 --- a/src/test/scala/beam/agentsim/agents/vehicles/LinkStateOfChargeSpec.scala +++ b/src/test/scala/beam/agentsim/agents/vehicles/LinkStateOfChargeSpec.scala @@ -90,7 +90,8 @@ class LinkStateOfChargeSpec extends AnyWordSpecLike with Matchers with BeamHelpe events .collectFirst { case pte: PathTraversalEvent if pte.vehicleId == vehicleId => - pte.endLegPrimaryFuelLevel + pte.primaryFuelConsumed + pte.endLegPrimaryFuelLevel.asInstanceOf[Double] + pte.primaryFuelConsumed.asInstanceOf[Double] + } .getOrElse(Double.NaN) } diff --git a/src/test/scala/beam/agentsim/infrastructure/HierarchicalParkingManagerSpec.scala b/src/test/scala/beam/agentsim/infrastructure/HierarchicalParkingManagerSpec.scala index 2dd4eb2da78..b011ac5cc7c 100644 --- a/src/test/scala/beam/agentsim/infrastructure/HierarchicalParkingManagerSpec.scala +++ b/src/test/scala/beam/agentsim/infrastructure/HierarchicalParkingManagerSpec.scala @@ -47,8 +47,8 @@ class HierarchicalParkingManagerSpec val randomSeed: Int = 0 // a coordinate in the center of the UTM coordinate system - val coordCenterOfUTM = new Coord(500000, 5000000) - val centerSpaceTime = SpaceTime(coordCenterOfUTM, 0) + val coordCenterOfUTM: Coord = new Coord(500000, 5000000) + val centerSpaceTime: SpaceTime = SpaceTime(coordCenterOfUTM, 0) val beamConfig: BeamConfig = BeamConfig(system.settings.config) val geo = new GeoUtilsImpl(beamConfig) @@ -81,12 +81,9 @@ class HierarchicalParkingManagerSpec } { val inquiry = ParkingInquiry.init(centerSpaceTime, "work", triggerId = 10) - val envelope = new Envelope( - inquiry.destinationUtm.loc.getX + 100, - inquiry.destinationUtm.loc.getX - 100, - inquiry.destinationUtm.loc.getY + 100, - inquiry.destinationUtm.loc.getY - 100 - ) + val (expectedStall: ParkingStall, _) = + ParkingStall.lastResortStall(inquiry.destinationUtm.loc, new Random(randomSeed)) + val response = parkingManager.processParkingInquiry(inquiry) assert(response.triggerId == 10) assert(response.stall.tazId.toString == "emergency") @@ -115,12 +112,8 @@ class HierarchicalParkingManagerSpec ) val inquiry = ParkingInquiry.init(centerSpaceTime, "work", triggerId = 34347) - val envelope = new Envelope( - inquiry.destinationUtm.loc.getX + 100, - inquiry.destinationUtm.loc.getX - 100, - inquiry.destinationUtm.loc.getY + 100, - inquiry.destinationUtm.loc.getY - 100 - ) + val (expectedStall: ParkingStall, _) = + ParkingStall.lastResortStall(inquiry.destinationUtm.loc, new Random(randomSeed)) val response = parkingManager.processParkingInquiry(inquiry) assert(response.triggerId == 34347) diff --git a/src/test/scala/beam/agentsim/infrastructure/ParallelParkingManagerSpec.scala b/src/test/scala/beam/agentsim/infrastructure/ParallelParkingManagerSpec.scala index 3ea908d3b71..b8db304be80 100644 --- a/src/test/scala/beam/agentsim/infrastructure/ParallelParkingManagerSpec.scala +++ b/src/test/scala/beam/agentsim/infrastructure/ParallelParkingManagerSpec.scala @@ -78,12 +78,9 @@ class ParallelParkingManagerSpec } { val inquiry = ParkingInquiry.init(centerSpaceTime, "work", triggerId = 11) - val envelope = new Envelope( - inquiry.destinationUtm.loc.getX + 100, - inquiry.destinationUtm.loc.getX - 100, - inquiry.destinationUtm.loc.getY + 100, - inquiry.destinationUtm.loc.getY - 100 - ) + val (expectedStall: ParkingStall, _) = + ParkingStall.lastResortStall(inquiry.destinationUtm.loc, new Random(randomSeed)) + val response = parkingManager.processParkingInquiry(inquiry) assert(response.triggerId == 11) assert(response.stall.tazId.toString == "emergency") @@ -108,12 +105,9 @@ class ParallelParkingManagerSpec ) val inquiry = ParkingInquiry.init(centerSpaceTime, "work", triggerId = 173) - val envelope = new Envelope( - inquiry.destinationUtm.loc.getX + 100, - inquiry.destinationUtm.loc.getX - 100, - inquiry.destinationUtm.loc.getY + 100, - inquiry.destinationUtm.loc.getY - 100 - ) + val (expectedStall: ParkingStall, _) = + ParkingStall.lastResortStall(inquiry.destinationUtm.loc, new Random(randomSeed)) + val response = parkingManager.processParkingInquiry(inquiry) assert(response.triggerId == 173) assert(response.stall.tazId.toString == "emergency") diff --git a/src/test/scala/beam/agentsim/infrastructure/ZonalParkingManagerSpec.scala b/src/test/scala/beam/agentsim/infrastructure/ZonalParkingManagerSpec.scala index 54749f3fba7..57e4f0b8079 100644 --- a/src/test/scala/beam/agentsim/infrastructure/ZonalParkingManagerSpec.scala +++ b/src/test/scala/beam/agentsim/infrastructure/ZonalParkingManagerSpec.scala @@ -73,37 +73,31 @@ class ZonalParkingManagerSpec val geo = new GeoUtilsImpl(beamConfig) describe("ZonalParkingManager with no parking") { - val tazTreeMap = ZonalParkingManagerSpec - .mockTazTreeMap( - coords = List((coordCenterOfUTM, 10000)), // one TAZ at agent coordinate - startAtId = 1, - xMin = 167000, - yMin = 0, - xMax = 833000, - yMax = 10000000 - ) - .get - val config = beamConfig - val emptyParkingDescription: Iterator[String] = Iterator.single(ParkingZoneFileUtils.ParkingFileHeader) - val zonalParkingManager = ZonalParkingManagerSpec.mockZonalParkingManager( - config, - tazTreeMap, - geo, - emptyParkingDescription, - boundingBox, - randomSeed - ) it("should return a response with an emergency stall") { - val inquiry = ParkingInquiry.init(centerSpaceTime, "work", triggerId = 77239) - val expectedStall: ParkingStall = ParkingStall.lastResortStall( - new Envelope( - inquiry.destinationUtm.loc.getX + 100, - inquiry.destinationUtm.loc.getX - 100, - inquiry.destinationUtm.loc.getY + 100, - inquiry.destinationUtm.loc.getY - 100 - ), - new Random(randomSeed) - ) + + for { + tazTreeMap <- ZonalParkingManagerSpec.mockTazTreeMap( + coords = List((coordCenterOfUTM, 10000)), + startAtId = 1, + xMin = 167000, + yMin = 0, + xMax = 833000, + yMax = 10000000 + ) // one TAZ at agent coordinate + config = beamConfig + emptyParkingDescription: Iterator[String] = Iterator.single(ParkingZoneFileUtils.ParkingFileHeader) + zonalParkingManager = ZonalParkingManagerSpec.mockZonalParkingManager( + config, + tazTreeMap, + geo, + emptyParkingDescription, + boundingBox, + randomSeed + ) + } { + val inquiry = ParkingInquiry.init(centerSpaceTime, "work", triggerId = 77239) + val (expectedStall: ParkingStall, _) = + ParkingStall.lastResortStall(inquiry.destinationUtm.loc, new Random(randomSeed)) val response = zonalParkingManager.processParkingInquiry(inquiry) @@ -478,8 +472,8 @@ class ZonalParkingManagerSpec describe("ZonalParkingManager with multiple parking files loaded") { it("should return the correct stall corresponding with the request (reservedFor, vehicleManagerId)") { - val sharedFleet1 = VehicleManager.createOrGetReservedFor("shared-fleet-1", VehicleManager.TypeEnum.Shared) - val sharedFleet2 = VehicleManager.createOrGetReservedFor("shared-fleet-2", VehicleManager.TypeEnum.Shared) + val sharedFleet1 = VehicleManager.createOrGetReservedFor("shared-fleet-1", Some(VehicleManager.TypeEnum.Shared)) + val sharedFleet2 = VehicleManager.createOrGetReservedFor("shared-fleet-2", Some(VehicleManager.TypeEnum.Shared)) val tazMap = taz.TAZTreeMap.fromCsv("test/input/beamville/taz-centers.csv") val stalls = InfrastructureUtils.loadStalls( "test/test-resources/beam/agentsim/infrastructure/taz-parking.csv", diff --git a/src/test/scala/beam/analysis/SimpleRideHailUtilizationTest.scala b/src/test/scala/beam/analysis/SimpleRideHailUtilizationTest.scala index cbfc1d155c0..ec31df3da47 100644 --- a/src/test/scala/beam/analysis/SimpleRideHailUtilizationTest.scala +++ b/src/test/scala/beam/analysis/SimpleRideHailUtilizationTest.scala @@ -22,8 +22,8 @@ class SimpleRideHailUtilizationTest extends AnyFunSuite with Matchers { arrivalTime = 1, mode = BeamMode.CAR, legLength = 1, - linkIds = IndexedSeq.empty, - linkTravelTime = IndexedSeq.empty, + linkIds = Array.empty, + linkTravelTime = Array.empty, startX = 0, startY = 0, endX = 1, @@ -36,10 +36,10 @@ class SimpleRideHailUtilizationTest extends AnyFunSuite with Matchers { None, None, None, - IndexedSeq.empty, - 0.0, + Array.empty, + 0.0f, None, - riders = Vector() + riders = Array.empty ) test("Should ignore non-ridehail vehicles") { diff --git a/src/test/scala/beam/analysis/cartraveltime/StudyAreaTripFilterTest.scala b/src/test/scala/beam/analysis/cartraveltime/StudyAreaTripFilterTest.scala index 700508f107a..499c5174c77 100644 --- a/src/test/scala/beam/analysis/cartraveltime/StudyAreaTripFilterTest.scala +++ b/src/test/scala/beam/analysis/cartraveltime/StudyAreaTripFilterTest.scala @@ -54,34 +54,34 @@ class StudyAreaTripFilterTest extends AnyFunSuite with Matchers { numPass = 1, beamLeg = beamLeg, currentTripMode = None, - primaryFuelConsumed = 1.0, - secondaryFuelConsumed = 0.0, - endLegPrimaryFuelLevel = 1.0, - endLegSecondaryFuelLevel = 0.0, + primaryFuelConsumed = 1.0f, + secondaryFuelConsumed = 0.0f, + endLegPrimaryFuelLevel = 1.0f, + endLegSecondaryFuelLevel = 0.0f, amountPaid = 0, - payloadIds = IndexedSeq.empty, - weight = 0.0, + payloadIds = Array.empty, + weight = 0.0f, emissionsProfile = None, - riders = Vector.empty + riders = Array.empty ) test("Should recognize that PTE is inside study area if both start and end are inside of study area") { val pteInsideStudyArea = - defaultPte.copy(startX = -97.763074, startY = 30.235920, endX = -97.687817, endY = 30.303643) + defaultPte.copy(startX = -97.763074f, startY = 30.235920f, endX = -97.687817f, endY = 30.303643f) studyAreaTripFilter.considerPathTraversal(pteInsideStudyArea) shouldBe true } test("Should recognize that PTE is outside of study area if only start is outside of study area") { - val pte = defaultPte.copy(startX = -97.792733, startY = 30.177015, endX = -97.687817, endY = 30.303643) + val pte = defaultPte.copy(startX = -97.792733f, startY = 30.177015f, endX = -97.687817f, endY = 30.303643f) studyAreaTripFilter.considerPathTraversal(pte) shouldBe false } test("Should recognize that PTE is outside of study area if only end is outside of study area") { - val pte = defaultPte.copy(startX = -97.763074, startY = 30.235920, endX = -97.661009, endY = 30.372633) + val pte = defaultPte.copy(startX = -97.763074f, startY = 30.235920f, endX = -97.661009f, endY = 30.372633f) studyAreaTripFilter.considerPathTraversal(pte) shouldBe false } test("Should recognize that PTE is outside of study area if both start and end are outside of study area") { - val pte = defaultPte.copy(startX = -97.792733, startY = 30.177015, endX = -97.661009, endY = 30.372633) + val pte = defaultPte.copy(startX = -97.792733f, startY = 30.177015f, endX = -97.661009f, endY = 30.372633f) studyAreaTripFilter.considerPathTraversal(pte) shouldBe false } } diff --git a/src/test/scala/beam/physsim/cchRoutingAssignment/RoutingFrameworkTravelTimeCalculatorSpec.scala b/src/test/scala/beam/physsim/cchRoutingAssignment/RoutingFrameworkTravelTimeCalculatorSpec.scala index 75178ebb4c3..b7f7accedfc 100644 --- a/src/test/scala/beam/physsim/cchRoutingAssignment/RoutingFrameworkTravelTimeCalculatorSpec.scala +++ b/src/test/scala/beam/physsim/cchRoutingAssignment/RoutingFrameworkTravelTimeCalculatorSpec.scala @@ -36,7 +36,7 @@ class RoutingFrameworkTravelTimeCalculatorSpec extends AnyFlatSpec with Matchers "RoutingFrameworkTravelTimeCalculator" must "create hour to travel infos map properly" in { val infos = calculator.generateHour2Events( - Lists.newArrayList(event(3500, IndexedSeq(1, 2, 3, 4, 5), IndexedSeq(30.0, 40.0, 40.0, 1300.0, 2500.0))) + Lists.newArrayList(event(3500, Array(1, 2, 3, 4, 5), Array(30.0f, 40.0f, 40.0f, 1300.0f, 2500.0f))) ) val expectedInfos = Map(1 -> Seq(TravelInfo(IndexedSeq(4, 5))), 0 -> Seq(TravelInfo(IndexedSeq(1, 2, 3)))) @@ -90,8 +90,8 @@ class RoutingFrameworkTravelTimeCalculatorSpec extends AnyFlatSpec with Matchers private def event( departureTime: Int, - linkIds: IndexedSeq[Int], - linkTravelTime: IndexedSeq[Double] + linkIds: Array[Int], + linkTravelTime: Array[Float] ): PathTraversalEvent = { val e = mock(classOf[PathTraversalEvent]) when(e.departureTime).thenReturn(departureTime) diff --git a/src/test/scala/beam/router/skim/urbansim/BackgroundSkimsCreatorTest.scala b/src/test/scala/beam/router/skim/urbansim/BackgroundSkimsCreatorTest.scala index d38e2799d16..06df51aea76 100644 --- a/src/test/scala/beam/router/skim/urbansim/BackgroundSkimsCreatorTest.scala +++ b/src/test/scala/beam/router/skim/urbansim/BackgroundSkimsCreatorTest.scala @@ -144,12 +144,12 @@ class BackgroundSkimsCreatorTest extends AnyFlatSpec with Matchers with BeamHelp pathTypeToSkimsCount(ActivitySimPathType.DRV_HVY_WLK) shouldBe 9 pathTypeToSkimsCount(ActivitySimPathType.WLK_LOC_WLK) shouldBe 86 - pathTypeToSkimsCount(ActivitySimPathType.DRV_LRF_WLK) shouldBe 19 + pathTypeToSkimsCount(ActivitySimPathType.DRV_LRF_WLK) shouldBe 22 pathTypeToSkimsCount(ActivitySimPathType.WLK_LRF_WLK) shouldBe 28 pathTypeToSkimsCount(ActivitySimPathType.WLK_HVY_WLK) shouldBe 24 - pathTypeToSkimsCount(ActivitySimPathType.DRV_LOC_WLK) shouldBe 34 + pathTypeToSkimsCount(ActivitySimPathType.DRV_LOC_WLK) shouldBe 31 - skims.keys.size shouldBe (9 + 86 + 19 + 28 + 24 + 34) + skims.keys.size shouldBe (9 + 86 + 22 + 28 + 24 + 31) } "skims creator" should "generate all types of skims" in { diff --git a/src/test/scala/beam/sim/output/IterationsPassengerPerTripTests.scala b/src/test/scala/beam/sim/output/IterationsPassengerPerTripTests.scala index 6b25d8a33e1..83cd0fcb1e1 100644 --- a/src/test/scala/beam/sim/output/IterationsPassengerPerTripTests.scala +++ b/src/test/scala/beam/sim/output/IterationsPassengerPerTripTests.scala @@ -260,20 +260,20 @@ class IterationsPassengerPerTripTests extends AnyWordSpecLike with Matchers with 0.0, null, null, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, None, None, None, - IndexedSeq.empty, - 0.0, + Array.empty, + 0.0f, None, null ) diff --git a/src/test/scala/beam/utils/scenario/PreviousRunPlanMergerTest.scala b/src/test/scala/beam/utils/scenario/PreviousRunPlanMergerTest.scala index 8ff9534bea1..8e1f59caf4c 100644 --- a/src/test/scala/beam/utils/scenario/PreviousRunPlanMergerTest.scala +++ b/src/test/scala/beam/utils/scenario/PreviousRunPlanMergerTest.scala @@ -370,6 +370,8 @@ class PreviousRunPlanMergerTest extends AnyWordSpecLike with Matchers { None, None, None, + None, + None, Seq(), None ) @@ -404,6 +406,8 @@ class PreviousRunPlanMergerTest extends AnyWordSpecLike with Matchers { None, None, None, + None, + None, Seq.empty, None ) @@ -430,6 +434,8 @@ class PreviousRunPlanMergerTest extends AnyWordSpecLike with Matchers { None, None, None, + None, + None, Seq.empty, None ) diff --git a/src/test/scala/beam/utils/scenario/urbansim/HOVModeTransformerTest.scala b/src/test/scala/beam/utils/scenario/urbansim/HOVModeTransformerTest.scala index 86c0cda4554..7a8d691b28b 100644 --- a/src/test/scala/beam/utils/scenario/urbansim/HOVModeTransformerTest.scala +++ b/src/test/scala/beam/utils/scenario/urbansim/HOVModeTransformerTest.scala @@ -517,6 +517,8 @@ object HOVModeTransformerTest { legMode = Some(mode), legDepartureTime = None, legTravelTime = None, + legExpectedTravelTime = None, + legExpectedCost = None, legRouteType = None, legRouteStartLink = None, legRouteEndLink = None, @@ -542,6 +544,8 @@ object HOVModeTransformerTest { legMode = None, legDepartureTime = None, legTravelTime = None, + legExpectedTravelTime = None, + legExpectedCost = None, legRouteType = None, legRouteStartLink = None, legRouteEndLink = None, diff --git a/test/input/beamville/beam-freight.conf b/test/input/beamville/beam-freight.conf index 9e41c1c742c..472bf9a636a 100644 --- a/test/input/beamville/beam-freight.conf +++ b/test/input/beamville/beam-freight.conf @@ -10,8 +10,14 @@ beam.agentsim.agents.freight { plansFilePath = ${beam.inputDirectory}"/freight/payload-plans.csv" toursFilePath = ${beam.inputDirectory}"/freight/freight-tours.csv" carriersFilePath = ${beam.inputDirectory}"/freight/freight-carriers.csv" - carrierParkingFilePath = ${beam.inputDirectory}"/freight/freight-depots.csv" vehicleTypesFilePath = ${beam.inputDirectory}"/freight/freight-vehicleTypes.csv" + + carrierParkingFilePath = ${beam.inputDirectory}"/freight/freight-parking.csv" + #carrierParkingFilePath = ${beam.inputDirectory}"/freight/freight-parking-low.csv" + #carrierParkingFilePath = ${beam.inputDirectory}"/freight/freight-parking-medium.csv" + #carrierParkingFilePath = ${beam.inputDirectory}"/freight/freight-parking-high.csv" + #carrierParkingFilePath = ${beam.inputDirectory}"/freight/freight-parking-unlimited.csv" + reader = "Generic" replanning { disableAfterIteration = 1 diff --git a/test/input/beamville/beam.conf b/test/input/beamville/beam.conf index 6c5b6dc919c..6e7c8a2bf9c 100755 --- a/test/input/beamville/beam.conf +++ b/test/input/beamville/beam.conf @@ -206,7 +206,7 @@ beam.outputs.writeEventsInterval = ${beam.outputs.defaultWriteInterval} beam.physsim.writeEventsInterval = ${beam.outputs.defaultWriteInterval} beam.physsim.writePlansInterval = ${beam.outputs.defaultWriteInterval} beam.outputs.writeAnalysis = false -beam.physsim.linkStatsWriteInterval = 0 +beam.physsim.linkStatsWriteInterval = 1 # The remaining params customize how events are written to output files beam.outputs.events.fileOutputFormats = "csv,xml" # valid options: xml(.gz) , csv(.gz), none - DEFAULT: csv.gz diff --git a/test/input/beamville/freight/freight-depots.csv b/test/input/beamville/freight/freight-depots.csv index 96bf350a58d..113f26a5954 100644 --- a/test/input/beamville/freight/freight-depots.csv +++ b/test/input/beamville/freight/freight-depots.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:53f306cb96c186094b9f931ff19749b19a3808c6ad7a0736328d7a6a74600f27 -size 8370 +oid sha256:2e0c5909616aecfcc5f28bb33e1ce0fe126fb63d4c04474dff4444a0e277c1c0 +size 8898 diff --git a/test/input/beamville/freight/freight-parking-high.csv b/test/input/beamville/freight/freight-parking-high.csv new file mode 100644 index 00000000000..7d90c6a81ea --- /dev/null +++ b/test/input/beamville/freight/freight-parking-high.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc4b3aea36d35d0d2f0f35761a119c09ab955236fbc916504fecdb57fe108812 +size 740 diff --git a/test/input/beamville/freight/freight-parking-low.csv b/test/input/beamville/freight/freight-parking-low.csv new file mode 100644 index 00000000000..36a537289a7 --- /dev/null +++ b/test/input/beamville/freight/freight-parking-low.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02bf0551ec5455d644ec9b49a7c5179e9f2bd3a6a1cd44b110a132de8a982f96 +size 724 diff --git a/test/input/beamville/freight/freight-parking-medium.csv b/test/input/beamville/freight/freight-parking-medium.csv new file mode 100644 index 00000000000..fd12471c47f --- /dev/null +++ b/test/input/beamville/freight/freight-parking-medium.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:574b951be7e049f0d3470f8d6451ff6aee8f1c0d1a06f13b185056250bced290 +size 732 diff --git a/test/input/beamville/freight/freight-parking-unlimited.csv b/test/input/beamville/freight/freight-parking-unlimited.csv new file mode 100644 index 00000000000..ea4727940de --- /dev/null +++ b/test/input/beamville/freight/freight-parking-unlimited.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:552215eaf2a568994273dd87676e37ea05066d276d9f671d3bd5135e0c4b1a26 +size 796 diff --git a/test/input/beamville/freight/freight-parking.csv b/test/input/beamville/freight/freight-parking.csv new file mode 100644 index 00000000000..113f26a5954 --- /dev/null +++ b/test/input/beamville/freight/freight-parking.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e0c5909616aecfcc5f28bb33e1ce0fe126fb63d4c04474dff4444a0e277c1c0 +size 8898 diff --git a/test/input/sf-light/freight/freight-depots.csv b/test/input/sf-light/freight/freight-depots.csv new file mode 100644 index 00000000000..5dca9ff93ba --- /dev/null +++ b/test/input/sf-light/freight/freight-depots.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43852643264ef040e855d12101e533cf22b888593a0d2a28d1d26f17cd56b3e5 +size 504654 diff --git a/test/input/sf-light/freight/freight-depots.csv.gz b/test/input/sf-light/freight/freight-depots.csv.gz deleted file mode 100644 index 814699157f6..00000000000 --- a/test/input/sf-light/freight/freight-depots.csv.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2fcd7271e8bae1d84c0c591ef9aa4dbba403284d47e0e11aa6eca910fd7fc980 -size 30826 diff --git a/test/input/sf-light/freight/freight-parking.csv b/test/input/sf-light/freight/freight-parking.csv new file mode 100644 index 00000000000..1b6ff243706 --- /dev/null +++ b/test/input/sf-light/freight/freight-parking.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0deb1ce701904119a2bd003d0f81ddc96c543c71dd01f68e032a72c078fe307 +size 539219 diff --git a/test/input/sf-light/freight/freight-vehicleTypes.csv b/test/input/sf-light/freight/freight-vehicleTypes.csv new file mode 100644 index 00000000000..14bed36f78e --- /dev/null +++ b/test/input/sf-light/freight/freight-vehicleTypes.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:911560012e3ac4c86804409980a37a63cf35e1fbb816cb55131eb8195ec9d906 +size 702 diff --git a/test/input/sf-light/sf-light-freight.conf b/test/input/sf-light/sf-light-freight.conf index b77c734a31c..1aedbc47417 100644 --- a/test/input/sf-light/sf-light-freight.conf +++ b/test/input/sf-light/sf-light-freight.conf @@ -3,14 +3,18 @@ include "sf-light.conf" beam.agentsim.simulationName = "freight" beam.agentsim.firstIteration = 0 -beam.agentsim.lastIteration = 1 +beam.agentsim.lastIteration = 0 beam.agentsim.agents.freight { enabled = true plansFilePath = ${beam.inputDirectory}"/freight/payload-plans.csv" toursFilePath = ${beam.inputDirectory}"/freight/freight-tours.csv" carriersFilePath = ${beam.inputDirectory}"/freight/freight-carriers.csv" - carrierParkingFilePath = ${beam.inputDirectory}"/freight/freight-depots.csv.gz" + vehicleTypesFilePath = ${beam.inputDirectory}"/freight/freight-vehicleTypes.csv" + + carrierParkingFilePath = ${beam.inputDirectory}"/freight/freight-parking.csv" + + reader = "Generic" replanning { disableAfterIteration = 1 departureTime = 28800