|
| 1 | +library(epiprocess) |
| 2 | + |
| 3 | +hhs_arch <- read_csv("https://healthdata.gov/resource/g62h-syeh.csv?$limit=90000&$select=date,state,previous_day_admission_influenza_confirmed") %>% |
| 4 | + mutate( |
| 5 | + geo_value = tolower(state), |
| 6 | + time_value = as.Date(date) - 1L, |
| 7 | + hhs = previous_day_admission_influenza_confirmed |
| 8 | + ) |> |
| 9 | + select(geo_value, time_value, hhs) |> |
| 10 | + as_epi_df() |> |
| 11 | + group_by(geo_value) |> |
| 12 | + epi_slide_sum( |
| 13 | + hhs, |
| 14 | + na.rm = TRUE, |
| 15 | + .window_size = 7L, |
| 16 | + .ref_time_values = seq.Date(as.Date("2020-01-04"), as.Date("2024-04-24"), by = 7), |
| 17 | + ) |> |
| 18 | + mutate(hhs = hhs_7dsum, hhs_7dsum = NULL) |
| 19 | + |
| 20 | +convert_epiweek_to_season <- function(epiyear, epiweek) { |
| 21 | + # Convert epiweek to season |
| 22 | + update_inds <- epiweek <= 39 |
| 23 | + epiyear <- ifelse(update_inds, epiyear - 1, epiyear) |
| 24 | + |
| 25 | + season <- paste0(epiyear, "/", substr((epiyear + 1), 3, 4)) |
| 26 | + return(season) |
| 27 | +} |
| 28 | + |
| 29 | +epiweeks_in_year <- function(year) { |
| 30 | + last_week_of_year <- seq.Date(as.Date(paste0(year, "-12-24")), |
| 31 | + as.Date(paste0(year, "-12-31")), |
| 32 | + by = 1 |
| 33 | + ) |
| 34 | + return(max(as.numeric(MMWRweek::MMWRweek(last_week_of_year)$MMWRweek))) |
| 35 | +} |
| 36 | + |
| 37 | +convert_epiweek_to_season_week <- function(epiyear, epiweek, season_start = 39) { |
| 38 | + season_week <- epiweek - 39 |
| 39 | + update_inds <- season_week <= 0 |
| 40 | + if (!any(update_inds)) { |
| 41 | + # none need to be updated |
| 42 | + return(season_week) |
| 43 | + } |
| 44 | + # last year's # of epiweeks determines which week in the season we're at at |
| 45 | + # the beginning of the year |
| 46 | + season_week[update_inds] <- season_week[update_inds] + |
| 47 | + sapply(epiyear[update_inds] - 1, epiweeks_in_year) |
| 48 | + |
| 49 | + return(season_week) |
| 50 | +} |
| 51 | + |
| 52 | +df <- readr::read_csv("https://data.cdc.gov/resource/ua7e-t2fy.csv?$limit=20000&$select=weekendingdate,jurisdiction,totalconfflunewadm") |
| 53 | +df <- df %>% |
| 54 | + mutate( |
| 55 | + epiweek = epiweek(weekendingdate), |
| 56 | + epiyear = epiyear(weekendingdate) |
| 57 | + ) %>% |
| 58 | + left_join( |
| 59 | + (.) %>% |
| 60 | + distinct(epiweek, epiyear) %>% |
| 61 | + mutate( |
| 62 | + season = convert_epiweek_to_season(epiyear, epiweek), |
| 63 | + season_week = convert_epiweek_to_season_week(epiyear, epiweek) |
| 64 | + ), |
| 65 | + by = c("epiweek", "epiyear") |
| 66 | + ) |
| 67 | + |
| 68 | + |
| 69 | +to_compare <- df %>% |
| 70 | + mutate(time_value = as.Date(weekendingdate), geo_value = tolower(jurisdiction), nhsn = totalconfflunewadm) %>% |
| 71 | + select(-weekendingdate, -jurisdiction, -totalconfflunewadm) %>% |
| 72 | + full_join(hhs_arch, by = join_by(geo_value, time_value)) %>% |
| 73 | + select(time_value, geo_value, old_source = nhsn, new_source = hhs) |
| 74 | + |
| 75 | +saveRDS( |
| 76 | + df |> |
| 77 | + mutate(time_value = as.Date(weekendingdate), geo_value = tolower(jurisdiction), nhsn = totalconfflunewadm) %>% |
| 78 | + select(-weekendingdate, -jurisdiction, -totalconfflunewadm), |
| 79 | + here::here("_data", "climatological_model_data.rds") |
| 80 | +) |
| 81 | +saveRDS(to_compare, here::here("_data", "hhs_v_nhsn.rds")) |
| 82 | + |
0 commit comments