Skip to content

Commit e71211b

Browse files
committed
note geos we created in separate col
1 parent 96da3bb commit e71211b

File tree

1 file changed

+155
-58
lines changed

1 file changed

+155
-58
lines changed

scripts/signal_spreadsheet_updater.R

Lines changed: 155 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ signal_sheet <- suppressMessages(read_csv("delphi-eng-covidcast-data-sources-sig
126126
# Fields we want to add.
127127
new_fields <- c(
128128
"Geographic Scope",
129+
"Delphi-Aggregated Geography",
129130
"Temporal Scope Start",
130131
"Temporal Scope End",
131132
"Reporting Cadence",
@@ -344,74 +345,169 @@ geo_scope <- c(
344345
source_updated[, col] <- geo_scope[source_updated$data_source]
345346

346347

348+
349+
347350
col <- "Available Geography"
348-
# List all available geo-levels. If a geo-level was created by Delphi
349-
# aggregation (as opposed to being ingested directly from the data source),
350-
# indicate this as per this example: county, state (by Delphi), National
351-
# (by Delphi).
352-
353-
# Tool: Create lists of geos for each data source-signal combo based on what is reported in metadata (does not include quidel, at least with).
354-
metadata_factorgeo <- metadata
355-
metadata_factorgeo$geo_type <- factor(metadata_factorgeo$geo_type, levels = c("county", "hrr", "msa", "dma", "state", "hhs", "nation"))
356-
auto_geo_list_by_signal <- arrange(
357-
metadata_factorgeo,
358-
geo_type
359-
) %>%
360-
group_by(
361-
data_source,
362-
signal
363-
) %>%
364-
summarize(
365-
geos_list = paste(geo_type, collapse = ", "),
366-
.groups = "keep"
367-
) %>%
368-
ungroup()
351+
# List all available geo-levels, e.g: county,state,nation
352+
353+
# # Tool: Create lists of geos for each data source-signal combo based on what is
354+
# # reported in metadata (does not include quidel).
355+
# metadata_factorgeo <- metadata
356+
# metadata_factorgeo$geo_type <- factor(metadata_factorgeo$geo_type, levels = c("county", "hrr", "msa", "dma", "state", "hhs", "nation"))
357+
# auto_geo_list_by_signal <- arrange(
358+
# metadata_factorgeo,
359+
# geo_type
360+
# ) %>%
361+
# group_by(
362+
# data_source,
363+
# signal
364+
# ) %>%
365+
# summarize(
366+
# geos_list = paste(geo_type, collapse = ", "),
367+
# .groups = "keep"
368+
# ) %>%
369+
# ungroup()
370+
371+
# # Tool: Are there any data sources where geos_list is different for different signal?
372+
# different_geos_by_signal <- count(auto_geo_list_by_signal, data_source, geos_list, name = "n_signals")
373+
# # different_geos_by_signal
374+
# # which(duplicated(select(different_geos_by_signal, data_source)))
375+
376+
# # Keep most common geos_list for each data source.
377+
# most_common_geos_list <- group_by(different_geos_by_signal, data_source) %>%
378+
# slice_max(n_signals, with_ties = FALSE)
379+
# # most_common_geos_list
380+
# leftover_datasource_geos <- anti_join(different_geos_by_signal, most_common_geos_list)
381+
# # leftover_datasource_geos
382+
# leftover_signal_geos <- semi_join(auto_geo_list_by_signal, leftover_datasource_geos)
383+
# # leftover_signal_geos
384+
385+
# These values are applied first. They are the default (most common) geos for each data source.
386+
avail_geos <- c(
387+
"chng" = glue("county,hrr,msa,state,hhs,nation"),
388+
"covid-act-now" = glue("county,hrr,msa,state,hhs,nation"),
389+
"doctor-visits" = glue("county,hrr,msa,state,hhs,nation"),
390+
"dsew-cpr" = glue("county,msa,state,hhs,nation"),
391+
"fb-survey" = glue("county,hrr,msa,state,nation"),
392+
"ght" = glue("hrr,msa,dma,state"),
393+
"google-survey" = glue("county,hrr,msa,state"),
394+
"google-symptoms" = glue("county,hrr,msa,state,hhs,nation"),
395+
"hhs" = glue("state,hhs,nation"),
396+
"hospital-admissions" = glue("county,hrr,msa,state,hhs,nation"),
397+
"indicator-combination" = glue("county,hrr,msa,state,hhs,nation"),
398+
"jhu-csse" = glue("county,hrr,msa,state,hhs,nation"),
399+
"nchs-mortality" = glue("state,nation"),
400+
# Quidel non-flu signals
401+
"quidel" = glue("county,hrr,msa,state,hhs,nation"),
402+
"safegraph" = glue("county,hrr,msa,state,hhs,nation"),
403+
"usa-facts" = glue("county,hrr,msa,state,hhs,nation"),
404+
"youtube-survey" = "state"
405+
)
406+
407+
# These are signal-specific geo lists. These are less common and are applied as a patch.
408+
dsew_geos <- glue("state,hhs,nation")
409+
fb_geos1 <- glue("county,state,nation")
410+
fb_geos2 <- glue("county,msa,state,nation")
411+
hosp_geos <- glue("county,hrr,msa,state")
412+
combo_geos <- glue("county,msa,state")
413+
quidel_geos <- glue("msa,state")
414+
leftover_signal_geos_manual <- tibble::tribble(
415+
~data_source, ~signal, ~geos_list,
416+
"chng", "7dav_inpatient_covid", "state",
417+
"chng", "7dav_outpatient_covid", "state",
418+
419+
"dsew-cpr", "booster_doses_admin_7dav", dsew_geos,
420+
"dsew-cpr", "doses_admin_7dav", dsew_geos,
421+
"dsew-cpr", "people_booster_doses", dsew_geos,
422+
423+
"fb-survey", "smoothed_vaccine_barrier_appointment_location_tried", fb_geos1,
424+
"fb-survey", "smoothed_vaccine_barrier_other_tried", fb_geos1,
425+
"fb-survey", "smoothed_wvaccine_barrier_appointment_location_tried", fb_geos1,
426+
"fb-survey", "smoothed_wvaccine_barrier_other_tried", fb_geos1,
427+
428+
"fb-survey", "smoothed_vaccine_barrier_appointment_time_tried", fb_geos2,
429+
"fb-survey", "smoothed_vaccine_barrier_childcare_tried", fb_geos2,
430+
"fb-survey", "smoothed_vaccine_barrier_document_tried", fb_geos2,
431+
"fb-survey", "smoothed_vaccine_barrier_eligible_tried", fb_geos2,
432+
"fb-survey", "smoothed_vaccine_barrier_language_tried", fb_geos2,
433+
"fb-survey", "smoothed_vaccine_barrier_no_appointments_tried", fb_geos2,
434+
"fb-survey", "smoothed_vaccine_barrier_none_tried", fb_geos2,
435+
"fb-survey", "smoothed_vaccine_barrier_technical_difficulties_tried", fb_geos2,
436+
"fb-survey", "smoothed_vaccine_barrier_technology_access_tried", fb_geos2,
437+
"fb-survey", "smoothed_vaccine_barrier_time_tried", fb_geos2,
438+
"fb-survey", "smoothed_vaccine_barrier_travel_tried", fb_geos2,
439+
"fb-survey", "smoothed_vaccine_barrier_type_tried", fb_geos2,
440+
"fb-survey", "smoothed_wvaccine_barrier_appointment_time_tried", fb_geos2,
441+
"fb-survey", "smoothed_wvaccine_barrier_childcare_tried", fb_geos2,
442+
"fb-survey", "smoothed_wvaccine_barrier_document_tried", fb_geos2,
443+
"fb-survey", "smoothed_wvaccine_barrier_eligible_tried", fb_geos2,
444+
"fb-survey", "smoothed_wvaccine_barrier_language_tried", fb_geos2,
445+
"fb-survey", "smoothed_wvaccine_barrier_no_appointments_tried", fb_geos2,
446+
"fb-survey", "smoothed_wvaccine_barrier_none_tried", fb_geos2,
447+
"fb-survey", "smoothed_wvaccine_barrier_technical_difficulties_tried", fb_geos2,
448+
"fb-survey", "smoothed_wvaccine_barrier_technology_access_tried", fb_geos2,
449+
"fb-survey", "smoothed_wvaccine_barrier_time_tried", fb_geos2,
450+
"fb-survey", "smoothed_wvaccine_barrier_travel_tried", fb_geos2,
451+
"fb-survey", "smoothed_wvaccine_barrier_type_tried", fb_geos2,
452+
453+
"hospital-admissions", "smoothed_adj_covid19", hosp_geos,
454+
"hospital-admissions", "smoothed_covid19", hosp_geos,
455+
456+
"indicator-combination", "nmf_day_doc_fbc_fbs_ght", combo_geos,
457+
"indicator-combination", "nmf_day_doc_fbs_ght", combo_geos,
458+
459+
# Quidel flu signals
460+
"quidel", "raw_pct_negative", quidel_geos,
461+
"quidel", "smoothed_pct_negative", quidel_geos,
462+
"quidel", "raw_tests_per_device", quidel_geos,
463+
"quidel", "smoothed_tests_per_device", quidel_geos
464+
)
369465

370-
# Tool: Are there any data sources where geos_list is different for different signal?
371-
different_geos_by_signal <- count(auto_geo_list_by_signal, data_source, geos_list, name = "n_signals")
372-
# different_geos_by_signal
373-
# which(duplicated(select(different_geos_by_signal, data_source)))
466+
source_updated[, col] <- coalesce(avail_geos[source_updated$data_source], source_updated[[col]])
467+
468+
source_updated <- left_join(
469+
source_updated, leftover_signal_geos_manual,
470+
by = c("Signal" = "signal", "data_source")
471+
) %>%
472+
mutate(`Available Geography` = coalesce(geos_list, `Available Geography`)) %>%
473+
select(-geos_list)
374474

375-
# Keep most common geos_list for each data source.
376-
most_common_geos_list <- group_by(different_geos_by_signal, data_source) %>%
377-
slice_max(n_signals, with_ties = FALSE)
378-
# most_common_geos_list
379-
leftover_datasource_geos <- anti_join(different_geos_by_signal, most_common_geos_list)
380-
# leftover_datasource_geos
381-
leftover_signal_geos <- semi_join(auto_geo_list_by_signal, leftover_datasource_geos)
382-
# leftover_signal_geos
383475

384-
delphi_agg_text <- " (by Delphi)"
476+
col <- "Delphi-Aggregated Geography"
477+
# List available geo-levels that were created by Delphi (as opposed to being
478+
# ingested directly from the data source), e.g. if available at the county,
479+
# state, and nation levels but state and nation were aggregated by us from
480+
# provided county data: state,nation
385481

386482
# These values are applied first. They are the default (most common) geos for each data source.
387483
avail_geos <- c(
388-
"chng" = glue("county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text}"),
389-
"covid-act-now" = glue("county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text}"),
390-
"doctor-visits" = glue("county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text}"),
391-
"dsew-cpr" = glue("county, msa, state, hhs, nation{delphi_agg_text}"),
392-
"fb-survey" = glue("county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, nation{delphi_agg_text}"),
393-
"ght" = glue("hrr{delphi_agg_text}, msa{delphi_agg_text}, dma, state"),
394-
"google-survey" = glue("county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}"),
395-
"google-symptoms" = glue("county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state, hhs{delphi_agg_text}, nation{delphi_agg_text}"),
396-
"hhs" = glue("state, hhs{delphi_agg_text}, nation{delphi_agg_text}"),
397-
"hospital-admissions" = glue("county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text}"),
398-
"indicator-combination" = glue("county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text}"),
399-
"jhu-csse" = glue("county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text}"),
400-
"nchs-mortality" = glue("state, nation"),
484+
"chng" = glue("hrr,msa,state,hhs,nation"),
485+
"covid-act-now" = glue("hrr,msa,state,hhs,nation"),
486+
"doctor-visits" = glue("hrr,msa,state,hhs,nation"),
487+
"dsew-cpr" = glue("nation"),
488+
"fb-survey" = glue("county,hrr,msa,state,nation"),
489+
"ght" = glue("hrr,msa"),
490+
"google-survey" = glue("county,hrr,msa,state"),
491+
"google-symptoms" = glue("hrr,msa,hhs,nation"),
492+
"hhs" = glue("hhs,nation"),
493+
"hospital-admissions" = glue("county,hrr,msa,state,hhs,nation"),
494+
"indicator-combination" = glue("county,hrr,msa,state,hhs,nation"),
495+
"jhu-csse" = glue("hrr,msa,state,hhs,nation"),
496+
"nchs-mortality" = NA_character_,
401497
# Quidel non-flu signals
402-
"quidel" = glue("county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text}"),
403-
"safegraph" = glue("county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text}"),
404-
"usa-facts" = glue("county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text}"),
405-
"youtube-survey" = "state{delphi_agg_text}"
498+
"quidel" = glue("county,hrr,msa,state,hhs,nation"),
499+
"safegraph" = glue("county,hrr,msa,state,hhs,nation"),
500+
"usa-facts" = glue("hrr,msa,state,hhs,nation"),
501+
"youtube-survey" = "state"
406502
)
407503

408504
# These are signal-specific geo lists. These are less common and are applied as a patch.
409-
dsew_geos <- glue("state, hhs, nation{delphi_agg_text}")
410-
fb_geos1 <- glue("county{delphi_agg_text}, state{delphi_agg_text}, nation{delphi_agg_text}")
411-
fb_geos2 <- glue("county{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, nation{delphi_agg_text}")
412-
hosp_geos <- glue("county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}")
413-
combo_geos <- glue("county{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}")
414-
quidel_geos <- glue("msa{delphi_agg_text}, state{delphi_agg_text}")
505+
dsew_geos <- glue("nation")
506+
fb_geos1 <- glue("county,state,nation")
507+
fb_geos2 <- glue("county,msa,state,nation")
508+
hosp_geos <- glue("county,hrr,msa,state")
509+
combo_geos <- glue("county,msa,state")
510+
quidel_geos <- glue("msa,state")
415511
leftover_signal_geos_manual <- tibble::tribble(
416512
~data_source, ~signal, ~geos_list,
417513
"chng", "7dav_inpatient_covid", "state",
@@ -470,10 +566,11 @@ source_updated <- left_join(
470566
source_updated, leftover_signal_geos_manual,
471567
by = c("Signal" = "signal", "data_source")
472568
) %>%
473-
mutate(`Available Geography` = coalesce(geos_list, `Available Geography`)) %>%
569+
mutate(`Delphi-Aggregated Geography` = coalesce(geos_list, `Delphi-Aggregated Geography`)) %>%
474570
select(-geos_list)
475571

476572

573+
477574
# Temporal Scope Start
478575
# Above. YYYY-MM-DD, with epiweeks as YYYY-WW. Formatted as a string
479576

0 commit comments

Comments
 (0)