Skip to content

Commit

Permalink
compute a bootstrap sample dist
Browse files Browse the repository at this point in the history
  • Loading branch information
Nate-Wessel committed Apr 23, 2024
1 parent 85db16b commit 976dafc
Showing 1 changed file with 30 additions and 71 deletions.
101 changes: 30 additions & 71 deletions backend/app/get_travel_time.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from app.db import getConnection
from app.get_links import get_links
import numpy
import random

def get_travel_time(start_node, end_node, start_time, end_time, start_date, end_date, include_holidays, dow_list):

Expand All @@ -13,72 +15,24 @@ def get_travel_time(start_node, end_node, start_time, end_time, start_date, end_
SELECT 1 FROM ref.holiday WHERE ta_path.dt = holiday.dt
)'''


agg_tt_query = f'''
-- Aggregate segments to corridor on a daily, hourly basis
WITH corridor_hourly_daily_agg AS (
SELECT
cn.dt,
cn.hr,
SUM(cn.unadjusted_tt) AS corr_hourly_daily_tt
FROM congestion.network_segments_daily AS cn
WHERE
cn.segment_id::integer = ANY(%(seglist)s)
AND cn.hr <@ %(time_range)s::numrange
AND date_part('ISODOW', cn.dt)::integer = ANY(%(dow_list)s)
AND cn.dt <@ %(date_range)s::daterange
{tt_holiday_clause}
GROUP BY
cn.dt,
cn.hr
-- where corridor has at least 80pct of links with data
HAVING SUM(cn.length_w_data) >= %(length_m)s::numeric * 0.8
),
-- Average the hours selected into daily period level data
corridor_period_daily_avg_tt AS (
SELECT
dt,
AVG(corr_hourly_daily_tt) AS avg_corr_period_daily_tt
FROM corridor_hourly_daily_agg
GROUP BY dt
)
-- Average all the days with data to get period level data for each date range
SELECT
ROUND(AVG(avg_corr_period_daily_tt) / 60, 2) AS average_tt_min
FROM corridor_period_daily_avg_tt
'''

sample_size_query = f"""
SELECT SUM( ((length / 1000.0) / mean) * sample_size ) AS probe_hours
FROM here.ta_path
WHERE
link_dir = ANY(%(link_dir_list)s)
AND dt <@ %(date_range)s::daterange
AND EXTRACT(ISODOW FROM dt)::integer = ANY(%(dow_list)s)
AND EXTRACT(HOUR FROM tod)::numeric <@ %(time_range)s::numrange
{sample_holiday_clause}
"""

sample_hour_query = f"""
hourly_tt_query = f'''
SELECT
cn.dt,
cn.hr,
SUM(cn.unadjusted_tt) AS corr_hourly_daily_tt
dt,
hr,
SUM(cn.unadjusted_tt) * %(length_m)s::real / SUM(cn.length_w_data) AS tt
FROM congestion.network_segments_daily AS cn
WHERE
cn.segment_id::integer IN %(seglist)s
WHERE
cn.segment_id::integer = ANY(%(seglist)s)
AND cn.hr <@ %(time_range)s::numrange
AND date_part('ISODOW', cn.dt)::integer IN %(dow_list)s
AND cn.dt <@ %(date_range)s::daterange
AND date_part('ISODOW', cn.dt)::integer = ANY(%(dow_list)s)
AND cn.dt <@ %(date_range)s::daterange
{tt_holiday_clause}
GROUP BY
cn.dt,
cn.hr
-- where corridor has at least 80pct of links with data
HAVING SUM(cn.length_w_data) >= %(length_m)s::numeric * 0.8
"""
HAVING SUM(cn.length_w_data) >= %(length_m)s::numeric * 0.8;
'''

links = get_links(start_node, end_node)

Expand All @@ -97,20 +51,25 @@ def get_travel_time(start_node, end_node, start_time, end_time, start_date, end_
connection = getConnection()
with connection:
with connection.cursor() as cursor:
cursor.execute(agg_tt_query, query_params)
# travel_time may be null if there's insufficient data
travel_time, = cursor.fetchone()
cursor.execute(sample_size_query, query_params)
probe_hours, = cursor.fetchone()
# get the hourly travel times
cursor.execute(hourly_tt_query, query_params)
tt_hourly = [ tt for (dt, hr, tt) in cursor.fetchall() ]
connection.close()

cursor.execute(sample_hour_query, query_params)
hour_bins_used = cursor.rowcount
# bootstrap for synthetic sample distribution
sample_distribution = []
for i in range(0,100):
bootstrap_sample = random.choices(
tt_hourly,
k = len(tt_hourly)
)
sample_distribution.append( numpy.mean(bootstrap_sample) )

connection.close()
return {
'bins': hour_bins_used,
'travel_time': None if travel_time is None else float(travel_time),
'links': links,
'estimated_vehicle_count': None if travel_time is None else float((probe_hours * 60) / travel_time),
'query_params': query_params
'average_travel_time': numpy.mean(tt_hourly),
'upper': numpy.percentile(sample_distribution,95),
'lower': numpy.percentile(sample_distribution,5),
'hourly_travel_times': tt_hourly
#'links': links,
#'query_params': query_params
}

0 comments on commit 976dafc

Please sign in to comment.