@@ -213,13 +213,15 @@ daily_to_weekly <- function(epi_df, agg_method = c("sum", "mean"), keys = "geo_v
213
213
# ' @param epi_arch the archive to aggregate.
214
214
# ' @param agg_columns the columns to aggregate.
215
215
# ' @param agg_method the method to use to aggregate the data, one of "sum" or "mean".
216
- # ' @param day_of_week the day of the week to use as the reference day.
217
- # ' @param day_of_week_end the day of the week to use as the end of the week.
216
+ # ' @param week_reference the day of the week to use as the reference day (Wednesday is default).
217
+ # ' Note that this is 1-indexed, so 1 = Sunday, 2 = Monday, ..., 7 = Saturday.
218
+ # ' @param week_start the day of the week to use as the start of the week (Sunday is default).
219
+ # ' Note that this is 1-indexed, so 1 = Sunday, 2 = Monday, ..., 7 = Saturday.
218
220
daily_to_weekly_archive <- function (epi_arch ,
219
221
agg_columns ,
220
222
agg_method = c(" sum" , " mean" ),
221
- day_of_week = 4L ,
222
- day_of_week_end = 7L ) {
223
+ week_reference = 4L ,
224
+ week_start = 7L ) {
223
225
# How to aggregate the windowed data.
224
226
agg_method <- arg_match(agg_method )
225
227
# The columns we will later group by when aggregating.
@@ -230,67 +232,24 @@ daily_to_weekly_archive <- function(epi_arch,
230
232
sort()
231
233
# Choose a fast function to use to slide and aggregate.
232
234
if (agg_method == " sum" ) {
233
- slide_fun <- epi_slide_sum
235
+ # If the week is complete, this is equivalent to the sum. If the week is not
236
+ # complete, this is equivalent to 7/(number of days in the week) * the sum,
237
+ # which should be a decent approximation.
238
+ agg_fun <- \(x ) 7 * mean(x , na.rm = TRUE )
234
239
} else if (agg_method == " mean" ) {
235
- slide_fun <- epi_slide_mean
240
+ agg_fun <- \( x ) mean( x , na.rm = TRUE )
236
241
}
237
242
# Slide over the versions and aggregate.
238
243
epix_slide(
239
244
epi_arch ,
240
245
.versions = ref_time_values ,
241
246
function (x , group_keys , ref_time ) {
242
- # The last day of the week we will slide over.
243
- ref_time_last_week_end <- floor_date(ref_time , " week" , day_of_week_end - 1 )
244
-
245
- # To find the days we will slide over, we need to find the first and last
246
- # complete weeks of data. Get the max and min times, and then find the
247
- # first and last complete weeks of data.
248
- min_time <- min(x $ time_value )
249
- max_time <- max(x $ time_value )
250
-
251
- # Let's determine if the min and max times are in the same week.
252
- ceil_min_time <- ceiling_date(min_time , " week" , week_start = day_of_week_end - 1 )
253
- ceil_max_time <- ceiling_date(max_time , " week" , week_start = day_of_week_end - 1 )
254
-
255
- # If they're not in the same week, this means we have at least one
256
- # complete week of data to slide over.
257
- if (ceil_min_time < ceil_max_time ) {
258
- valid_slide_days <- seq.Date(
259
- from = ceiling_date(min_time , " week" , week_start = day_of_week_end - 1 ),
260
- to = floor_date(max_time , " week" , week_start = day_of_week_end - 1 ),
261
- by = 7L
262
- )
263
- } else {
264
- # This is the degenerate case, where we have about 1 week or less of
265
- # data. In this case, we opt to return nothing for two reasons:
266
- # 1. in most cases here, the data is incomplete for a single week,
267
- # 2. if the data is complete, a single week of data is not enough to
268
- # reasonably perform any kind of aggregation.
269
- return (tibble())
270
- }
271
-
272
- # If the last day of the week is not the end of the week, add it to the
273
- # list of valid slide days (this will produce an incomplete slide, but
274
- # that's fine for us, since it should only be 1 day, historically.)
275
- if (wday(max_time ) != day_of_week_end ) {
276
- valid_slide_days <- c(valid_slide_days , max_time )
277
- }
278
-
279
247
# Slide over the days and aggregate.
280
248
x %> %
281
- group_by(across(all_of(keys ))) %> %
282
- slide_fun(
283
- agg_columns ,
284
- .window_size = 7L ,
285
- na.rm = TRUE ,
286
- .ref_time_values = valid_slide_days
287
- ) %> %
288
- select(- all_of(agg_columns )) %> %
289
- rename_with(~ gsub(" slide_value_" , " " , .x )) %> %
290
- rename_with(~ gsub(" _7dsum" , " " , .x )) %> %
291
- # Round all dates to reference day of the week. These will get
292
- # de-duplicated by compactify in as_epi_archive below.
293
- mutate(time_value = round_date(time_value , " week" , day_of_week - 1 )) %> %
249
+ mutate(week_start = ceiling_date(time_value , " week" , week_start = week_start )- 1 ) %> %
250
+ summarize(across(all_of(agg_columns ), agg_fun ), .by = all_of(c(keys , " week_start" ))) %> %
251
+ mutate(time_value = round_date(week_start , " week" , week_reference - 1 )) %> %
252
+ select(- week_start ) %> %
294
253
as_tibble()
295
254
}
296
255
) %> %
0 commit comments