From 976dafc351de9d70da9826fc57215ff3711ca81a Mon Sep 17 00:00:00 2001 From: Nate-Wessel Date: Tue, 23 Apr 2024 17:51:51 +0000 Subject: [PATCH] compute a bootstrap sample dist --- backend/app/get_travel_time.py | 101 ++++++++++----------------------- 1 file changed, 30 insertions(+), 71 deletions(-) diff --git a/backend/app/get_travel_time.py b/backend/app/get_travel_time.py index 5f893db..9b273b2 100644 --- a/backend/app/get_travel_time.py +++ b/backend/app/get_travel_time.py @@ -1,5 +1,7 @@ from app.db import getConnection from app.get_links import get_links +import numpy +import random def get_travel_time(start_node, end_node, start_time, end_time, start_date, end_date, include_holidays, dow_list): @@ -13,72 +15,24 @@ def get_travel_time(start_node, end_node, start_time, end_time, start_date, end_ SELECT 1 FROM ref.holiday WHERE ta_path.dt = holiday.dt )''' - - agg_tt_query = f''' - -- Aggregate segments to corridor on a daily, hourly basis - WITH corridor_hourly_daily_agg AS ( - SELECT - cn.dt, - cn.hr, - SUM(cn.unadjusted_tt) AS corr_hourly_daily_tt - FROM congestion.network_segments_daily AS cn - WHERE - cn.segment_id::integer = ANY(%(seglist)s) - AND cn.hr <@ %(time_range)s::numrange - AND date_part('ISODOW', cn.dt)::integer = ANY(%(dow_list)s) - AND cn.dt <@ %(date_range)s::daterange - {tt_holiday_clause} - GROUP BY - cn.dt, - cn.hr - -- where corridor has at least 80pct of links with data - HAVING SUM(cn.length_w_data) >= %(length_m)s::numeric * 0.8 - ), - - -- Average the hours selected into daily period level data - corridor_period_daily_avg_tt AS ( - SELECT - dt, - AVG(corr_hourly_daily_tt) AS avg_corr_period_daily_tt - FROM corridor_hourly_daily_agg - GROUP BY dt - ) - - -- Average all the days with data to get period level data for each date range - SELECT - ROUND(AVG(avg_corr_period_daily_tt) / 60, 2) AS average_tt_min - FROM corridor_period_daily_avg_tt - ''' - - sample_size_query = f""" - SELECT SUM( ((length / 1000.0) / mean) * sample_size ) AS probe_hours - FROM here.ta_path - WHERE - link_dir = ANY(%(link_dir_list)s) - AND dt <@ %(date_range)s::daterange - AND EXTRACT(ISODOW FROM dt)::integer = ANY(%(dow_list)s) - AND EXTRACT(HOUR FROM tod)::numeric <@ %(time_range)s::numrange - {sample_holiday_clause} - """ - - sample_hour_query = f""" + hourly_tt_query = f''' SELECT - cn.dt, - cn.hr, - SUM(cn.unadjusted_tt) AS corr_hourly_daily_tt + dt, + hr, + SUM(cn.unadjusted_tt) * %(length_m)s::real / SUM(cn.length_w_data) AS tt FROM congestion.network_segments_daily AS cn - WHERE - cn.segment_id::integer IN %(seglist)s + WHERE + cn.segment_id::integer = ANY(%(seglist)s) AND cn.hr <@ %(time_range)s::numrange - AND date_part('ISODOW', cn.dt)::integer IN %(dow_list)s - AND cn.dt <@ %(date_range)s::daterange + AND date_part('ISODOW', cn.dt)::integer = ANY(%(dow_list)s) + AND cn.dt <@ %(date_range)s::daterange {tt_holiday_clause} GROUP BY cn.dt, cn.hr -- where corridor has at least 80pct of links with data - HAVING SUM(cn.length_w_data) >= %(length_m)s::numeric * 0.8 - """ + HAVING SUM(cn.length_w_data) >= %(length_m)s::numeric * 0.8; + ''' links = get_links(start_node, end_node) @@ -97,20 +51,25 @@ def get_travel_time(start_node, end_node, start_time, end_time, start_date, end_ connection = getConnection() with connection: with connection.cursor() as cursor: - cursor.execute(agg_tt_query, query_params) - # travel_time may be null if there's insufficient data - travel_time, = cursor.fetchone() - cursor.execute(sample_size_query, query_params) - probe_hours, = cursor.fetchone() + # get the hourly travel times + cursor.execute(hourly_tt_query, query_params) + tt_hourly = [ tt for (dt, hr, tt) in cursor.fetchall() ] + connection.close() - cursor.execute(sample_hour_query, query_params) - hour_bins_used = cursor.rowcount + # bootstrap for synthetic sample distribution + sample_distribution = [] + for i in range(0,100): + bootstrap_sample = random.choices( + tt_hourly, + k = len(tt_hourly) + ) + sample_distribution.append( numpy.mean(bootstrap_sample) ) - connection.close() return { - 'bins': hour_bins_used, - 'travel_time': None if travel_time is None else float(travel_time), - 'links': links, - 'estimated_vehicle_count': None if travel_time is None else float((probe_hours * 60) / travel_time), - 'query_params': query_params + 'average_travel_time': numpy.mean(tt_hourly), + 'upper': numpy.percentile(sample_distribution,95), + 'lower': numpy.percentile(sample_distribution,5), + 'hourly_travel_times': tt_hourly + #'links': links, + #'query_params': query_params } \ No newline at end of file