@@ -126,6 +126,7 @@ signal_sheet <- suppressMessages(read_csv("delphi-eng-covidcast-data-sources-sig
126
126
# Fields we want to add.
127
127
new_fields <- c(
128
128
" Geographic Scope" ,
129
+ " Delphi-Aggregated Geography" ,
129
130
" Temporal Scope Start" ,
130
131
" Temporal Scope End" ,
131
132
" Reporting Cadence" ,
@@ -344,74 +345,169 @@ geo_scope <- c(
344
345
source_updated [, col ] <- geo_scope [source_updated $ data_source ]
345
346
346
347
348
+
349
+
347
350
col <- " Available Geography"
348
- # List all available geo-levels. If a geo-level was created by Delphi
349
- # aggregation (as opposed to being ingested directly from the data source),
350
- # indicate this as per this example: county, state (by Delphi), National
351
- # (by Delphi).
352
-
353
- # Tool: Create lists of geos for each data source-signal combo based on what is reported in metadata (does not include quidel, at least with).
354
- metadata_factorgeo <- metadata
355
- metadata_factorgeo $ geo_type <- factor (metadata_factorgeo $ geo_type , levels = c(" county" , " hrr" , " msa" , " dma" , " state" , " hhs" , " nation" ))
356
- auto_geo_list_by_signal <- arrange(
357
- metadata_factorgeo ,
358
- geo_type
359
- ) %> %
360
- group_by(
361
- data_source ,
362
- signal
363
- ) %> %
364
- summarize(
365
- geos_list = paste(geo_type , collapse = " , " ),
366
- .groups = " keep"
367
- ) %> %
368
- ungroup()
351
+ # List all available geo-levels, e.g: county,state,nation
352
+
353
+ # # Tool: Create lists of geos for each data source-signal combo based on what is
354
+ # # reported in metadata (does not include quidel).
355
+ # metadata_factorgeo <- metadata
356
+ # metadata_factorgeo$geo_type <- factor(metadata_factorgeo$geo_type, levels = c("county", "hrr", "msa", "dma", "state", "hhs", "nation"))
357
+ # auto_geo_list_by_signal <- arrange(
358
+ # metadata_factorgeo,
359
+ # geo_type
360
+ # ) %>%
361
+ # group_by(
362
+ # data_source,
363
+ # signal
364
+ # ) %>%
365
+ # summarize(
366
+ # geos_list = paste(geo_type, collapse = ", "),
367
+ # .groups = "keep"
368
+ # ) %>%
369
+ # ungroup()
370
+
371
+ # # Tool: Are there any data sources where geos_list is different for different signal?
372
+ # different_geos_by_signal <- count(auto_geo_list_by_signal, data_source, geos_list, name = "n_signals")
373
+ # # different_geos_by_signal
374
+ # # which(duplicated(select(different_geos_by_signal, data_source)))
375
+
376
+ # # Keep most common geos_list for each data source.
377
+ # most_common_geos_list <- group_by(different_geos_by_signal, data_source) %>%
378
+ # slice_max(n_signals, with_ties = FALSE)
379
+ # # most_common_geos_list
380
+ # leftover_datasource_geos <- anti_join(different_geos_by_signal, most_common_geos_list)
381
+ # # leftover_datasource_geos
382
+ # leftover_signal_geos <- semi_join(auto_geo_list_by_signal, leftover_datasource_geos)
383
+ # # leftover_signal_geos
384
+
385
+ # These values are applied first. They are the default (most common) geos for each data source.
386
+ avail_geos <- c(
387
+ " chng" = glue(" county,hrr,msa,state,hhs,nation" ),
388
+ " covid-act-now" = glue(" county,hrr,msa,state,hhs,nation" ),
389
+ " doctor-visits" = glue(" county,hrr,msa,state,hhs,nation" ),
390
+ " dsew-cpr" = glue(" county,msa,state,hhs,nation" ),
391
+ " fb-survey" = glue(" county,hrr,msa,state,nation" ),
392
+ " ght" = glue(" hrr,msa,dma,state" ),
393
+ " google-survey" = glue(" county,hrr,msa,state" ),
394
+ " google-symptoms" = glue(" county,hrr,msa,state,hhs,nation" ),
395
+ " hhs" = glue(" state,hhs,nation" ),
396
+ " hospital-admissions" = glue(" county,hrr,msa,state,hhs,nation" ),
397
+ " indicator-combination" = glue(" county,hrr,msa,state,hhs,nation" ),
398
+ " jhu-csse" = glue(" county,hrr,msa,state,hhs,nation" ),
399
+ " nchs-mortality" = glue(" state,nation" ),
400
+ # Quidel non-flu signals
401
+ " quidel" = glue(" county,hrr,msa,state,hhs,nation" ),
402
+ " safegraph" = glue(" county,hrr,msa,state,hhs,nation" ),
403
+ " usa-facts" = glue(" county,hrr,msa,state,hhs,nation" ),
404
+ " youtube-survey" = " state"
405
+ )
406
+
407
+ # These are signal-specific geo lists. These are less common and are applied as a patch.
408
+ dsew_geos <- glue(" state,hhs,nation" )
409
+ fb_geos1 <- glue(" county,state,nation" )
410
+ fb_geos2 <- glue(" county,msa,state,nation" )
411
+ hosp_geos <- glue(" county,hrr,msa,state" )
412
+ combo_geos <- glue(" county,msa,state" )
413
+ quidel_geos <- glue(" msa,state" )
414
+ leftover_signal_geos_manual <- tibble :: tribble(
415
+ ~ data_source , ~ signal , ~ geos_list ,
416
+ " chng" , " 7dav_inpatient_covid" , " state" ,
417
+ " chng" , " 7dav_outpatient_covid" , " state" ,
418
+
419
+ " dsew-cpr" , " booster_doses_admin_7dav" , dsew_geos ,
420
+ " dsew-cpr" , " doses_admin_7dav" , dsew_geos ,
421
+ " dsew-cpr" , " people_booster_doses" , dsew_geos ,
422
+
423
+ " fb-survey" , " smoothed_vaccine_barrier_appointment_location_tried" , fb_geos1 ,
424
+ " fb-survey" , " smoothed_vaccine_barrier_other_tried" , fb_geos1 ,
425
+ " fb-survey" , " smoothed_wvaccine_barrier_appointment_location_tried" , fb_geos1 ,
426
+ " fb-survey" , " smoothed_wvaccine_barrier_other_tried" , fb_geos1 ,
427
+
428
+ " fb-survey" , " smoothed_vaccine_barrier_appointment_time_tried" , fb_geos2 ,
429
+ " fb-survey" , " smoothed_vaccine_barrier_childcare_tried" , fb_geos2 ,
430
+ " fb-survey" , " smoothed_vaccine_barrier_document_tried" , fb_geos2 ,
431
+ " fb-survey" , " smoothed_vaccine_barrier_eligible_tried" , fb_geos2 ,
432
+ " fb-survey" , " smoothed_vaccine_barrier_language_tried" , fb_geos2 ,
433
+ " fb-survey" , " smoothed_vaccine_barrier_no_appointments_tried" , fb_geos2 ,
434
+ " fb-survey" , " smoothed_vaccine_barrier_none_tried" , fb_geos2 ,
435
+ " fb-survey" , " smoothed_vaccine_barrier_technical_difficulties_tried" , fb_geos2 ,
436
+ " fb-survey" , " smoothed_vaccine_barrier_technology_access_tried" , fb_geos2 ,
437
+ " fb-survey" , " smoothed_vaccine_barrier_time_tried" , fb_geos2 ,
438
+ " fb-survey" , " smoothed_vaccine_barrier_travel_tried" , fb_geos2 ,
439
+ " fb-survey" , " smoothed_vaccine_barrier_type_tried" , fb_geos2 ,
440
+ " fb-survey" , " smoothed_wvaccine_barrier_appointment_time_tried" , fb_geos2 ,
441
+ " fb-survey" , " smoothed_wvaccine_barrier_childcare_tried" , fb_geos2 ,
442
+ " fb-survey" , " smoothed_wvaccine_barrier_document_tried" , fb_geos2 ,
443
+ " fb-survey" , " smoothed_wvaccine_barrier_eligible_tried" , fb_geos2 ,
444
+ " fb-survey" , " smoothed_wvaccine_barrier_language_tried" , fb_geos2 ,
445
+ " fb-survey" , " smoothed_wvaccine_barrier_no_appointments_tried" , fb_geos2 ,
446
+ " fb-survey" , " smoothed_wvaccine_barrier_none_tried" , fb_geos2 ,
447
+ " fb-survey" , " smoothed_wvaccine_barrier_technical_difficulties_tried" , fb_geos2 ,
448
+ " fb-survey" , " smoothed_wvaccine_barrier_technology_access_tried" , fb_geos2 ,
449
+ " fb-survey" , " smoothed_wvaccine_barrier_time_tried" , fb_geos2 ,
450
+ " fb-survey" , " smoothed_wvaccine_barrier_travel_tried" , fb_geos2 ,
451
+ " fb-survey" , " smoothed_wvaccine_barrier_type_tried" , fb_geos2 ,
452
+
453
+ " hospital-admissions" , " smoothed_adj_covid19" , hosp_geos ,
454
+ " hospital-admissions" , " smoothed_covid19" , hosp_geos ,
455
+
456
+ " indicator-combination" , " nmf_day_doc_fbc_fbs_ght" , combo_geos ,
457
+ " indicator-combination" , " nmf_day_doc_fbs_ght" , combo_geos ,
458
+
459
+ # Quidel flu signals
460
+ " quidel" , " raw_pct_negative" , quidel_geos ,
461
+ " quidel" , " smoothed_pct_negative" , quidel_geos ,
462
+ " quidel" , " raw_tests_per_device" , quidel_geos ,
463
+ " quidel" , " smoothed_tests_per_device" , quidel_geos
464
+ )
369
465
370
- # Tool: Are there any data sources where geos_list is different for different signal?
371
- different_geos_by_signal <- count(auto_geo_list_by_signal , data_source , geos_list , name = " n_signals" )
372
- # different_geos_by_signal
373
- # which(duplicated(select(different_geos_by_signal, data_source)))
466
+ source_updated [, col ] <- coalesce(avail_geos [source_updated $ data_source ], source_updated [[col ]])
467
+
468
+ source_updated <- left_join(
469
+ source_updated , leftover_signal_geos_manual ,
470
+ by = c(" Signal" = " signal" , " data_source" )
471
+ ) %> %
472
+ mutate(`Available Geography` = coalesce(geos_list , `Available Geography` )) %> %
473
+ select(- geos_list )
374
474
375
- # Keep most common geos_list for each data source.
376
- most_common_geos_list <- group_by(different_geos_by_signal , data_source ) %> %
377
- slice_max(n_signals , with_ties = FALSE )
378
- # most_common_geos_list
379
- leftover_datasource_geos <- anti_join(different_geos_by_signal , most_common_geos_list )
380
- # leftover_datasource_geos
381
- leftover_signal_geos <- semi_join(auto_geo_list_by_signal , leftover_datasource_geos )
382
- # leftover_signal_geos
383
475
384
- delphi_agg_text <- " (by Delphi)"
476
+ col <- " Delphi-Aggregated Geography"
477
+ # List available geo-levels that were created by Delphi (as opposed to being
478
+ # ingested directly from the data source), e.g. if available at the county,
479
+ # state, and nation levels but state and nation were aggregated by us from
480
+ # provided county data: state,nation
385
481
386
482
# These values are applied first. They are the default (most common) geos for each data source.
387
483
avail_geos <- c(
388
- " chng" = glue(" county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
389
- " covid-act-now" = glue(" county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
390
- " doctor-visits" = glue(" county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
391
- " dsew-cpr" = glue(" county, msa, state, hhs, nation{delphi_agg_text} " ),
392
- " fb-survey" = glue(" county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, nation{delphi_agg_text} " ),
393
- " ght" = glue(" hrr{delphi_agg_text}, msa{delphi_agg_text}, dma, state " ),
394
- " google-survey" = glue(" county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text} " ),
395
- " google-symptoms" = glue(" county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
396
- " hhs" = glue(" state, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
397
- " hospital-admissions" = glue(" county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
398
- " indicator-combination" = glue(" county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
399
- " jhu-csse" = glue(" county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
400
- " nchs-mortality" = glue( " state, nation " ) ,
484
+ " chng" = glue(" hrr, msa, state, hhs, nation" ),
485
+ " covid-act-now" = glue(" hrr, msa, state, hhs, nation" ),
486
+ " doctor-visits" = glue(" hrr, msa, state, hhs, nation" ),
487
+ " dsew-cpr" = glue(" nation" ),
488
+ " fb-survey" = glue(" county, hrr, msa, state, nation" ),
489
+ " ght" = glue(" hrr, msa" ),
490
+ " google-survey" = glue(" county, hrr, msa, state" ),
491
+ " google-symptoms" = glue(" hrr, msa, hhs, nation" ),
492
+ " hhs" = glue(" hhs, nation" ),
493
+ " hospital-admissions" = glue(" county, hrr, msa, state, hhs, nation" ),
494
+ " indicator-combination" = glue(" county, hrr, msa, state, hhs, nation" ),
495
+ " jhu-csse" = glue(" hrr, msa, state, hhs, nation" ),
496
+ " nchs-mortality" = NA_character_ ,
401
497
# Quidel non-flu signals
402
- " quidel" = glue(" county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
403
- " safegraph" = glue(" county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
404
- " usa-facts" = glue(" county, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, hhs{delphi_agg_text}, nation{delphi_agg_text} " ),
405
- " youtube-survey" = " state{delphi_agg_text} "
498
+ " quidel" = glue(" county, hrr, msa, state, hhs, nation" ),
499
+ " safegraph" = glue(" county, hrr, msa, state, hhs, nation" ),
500
+ " usa-facts" = glue(" hrr, msa, state, hhs, nation" ),
501
+ " youtube-survey" = " state"
406
502
)
407
503
408
504
# These are signal-specific geo lists. These are less common and are applied as a patch.
409
- dsew_geos <- glue(" state, hhs, nation{delphi_agg_text} " )
410
- fb_geos1 <- glue(" county{delphi_agg_text}, state{delphi_agg_text}, nation{delphi_agg_text} " )
411
- fb_geos2 <- glue(" county{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text}, nation{delphi_agg_text} " )
412
- hosp_geos <- glue(" county{delphi_agg_text}, hrr{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text} " )
413
- combo_geos <- glue(" county{delphi_agg_text}, msa{delphi_agg_text}, state{delphi_agg_text} " )
414
- quidel_geos <- glue(" msa{delphi_agg_text}, state{delphi_agg_text} " )
505
+ dsew_geos <- glue(" nation" )
506
+ fb_geos1 <- glue(" county, state, nation" )
507
+ fb_geos2 <- glue(" county, msa, state, nation" )
508
+ hosp_geos <- glue(" county, hrr, msa, state" )
509
+ combo_geos <- glue(" county, msa, state" )
510
+ quidel_geos <- glue(" msa, state" )
415
511
leftover_signal_geos_manual <- tibble :: tribble(
416
512
~ data_source , ~ signal , ~ geos_list ,
417
513
" chng" , " 7dav_inpatient_covid" , " state" ,
@@ -470,10 +566,11 @@ source_updated <- left_join(
470
566
source_updated , leftover_signal_geos_manual ,
471
567
by = c(" Signal" = " signal" , " data_source" )
472
568
) %> %
473
- mutate(`Available Geography` = coalesce(geos_list , `Available Geography` )) %> %
569
+ mutate(`Delphi-Aggregated Geography` = coalesce(geos_list , `Delphi-Aggregated Geography` )) %> %
474
570
select(- geos_list )
475
571
476
572
573
+
477
574
# Temporal Scope Start
478
575
# Above. YYYY-MM-DD, with epiweeks as YYYY-WW. Formatted as a string
479
576
0 commit comments