@@ -338,57 +338,74 @@ print.step_climate <- function(x, width = max(20, options()$width - 30), ...) {
338
338
}
339
339
340
340
# ' group col by .idx values and sum windows around each .idx value
341
- # ' @param .idx the relevant periodic part of time value, e.g. the week number
342
- # ' @param col the list of values indexed by `.idx`
343
- # ' @param weights how much to weigh each particular datapoint
344
- # ' @param aggr the aggregation function, probably Quantile, mean or median
341
+ # ' @param idx_in the relevant periodic part of time value, e.g. the week number,
342
+ # ' limited to the relevant range
343
+ # ' @param col the list of values indexed by `idx_in`
344
+ # ' @param weights how much to weigh each particular datapoint (also indexed by
345
+ # ' `idx_in`)
346
+ # ' @param aggr the aggregation function, probably Quantile, mean, or median
345
347
# ' @param window_size the number of .idx entries before and after to include in
346
348
# ' the aggregation
347
- # ' @param modulus the maximum value of `.idx`
349
+ # ' @param modulus the number of days/weeks/months in the year, not including any
350
+ # ' leap days/weeks
348
351
# ' @importFrom lubridate %m-%
349
352
# ' @keywords internal
350
- roll_modular_multivec <- function (col , .idx , weights , aggr , window_size , modulus ) {
351
- tib <- tibble(col = col , weights = weights , .idx = .idx ) | >
353
+ roll_modular_multivec <- function (col , idx_in , weights , aggr , window_size , modulus ) {
354
+ # make a tibble where data gives the list of all datapoints with the
355
+ # corresponding .idx
356
+ tib <- tibble(col = col , weights = weights , .idx = idx_in ) | >
352
357
arrange(.idx ) | >
353
358
tidyr :: nest(data = c(col , weights ), .by = .idx )
354
- out <- double(modulus + 1 )
355
- for (iter in seq_along(out )) {
356
- # +1 from 1-indexing
357
- entries <- (iter - window_size ): (iter + window_size ) %% modulus
358
- entries [entries == 0 ] <- modulus
359
- # note that because we are 1-indexing, we're looking for indices that are 1
360
- # larger than the actual day/week in the year
361
- if (modulus == 365 ) {
362
- # we need to grab just the window around the leap day on the leap day
363
- if (iter == 366 ) {
364
- # there's an extra data point in front of the leap day
365
- entries <- (59 - window_size ): (59 + window_size - 1 ) %% modulus
366
- entries [entries == 0 ] <- modulus
367
- # adding in the leap day itself
368
- entries <- c(entries , 999 )
369
- } else if ((59 %in% entries ) || (60 %in% entries )) {
370
- # if we're on the Feb/March boundary for daily data, we need to add in the
371
- # leap day data
372
- entries <- c(entries , 999 )
373
- }
374
- } else if (modulus == 52 ) {
375
- # we need to grab just the window around the leap week on the leap week
376
- if (iter == 53 ) {
377
- entries <- (53 - window_size ): (53 + window_size - 1 ) %% 52
378
- entries [entries == 0 ] <- 52
379
- entries <- c(entries , 999 )
380
- } else if ((52 %in% entries ) || (1 %in% entries )) {
381
- # if we're on the year boundary for weekly data, we need to add in the
382
- # leap week data (which is the extra week at the end)
383
- entries <- c(entries , 999 )
384
- }
385
- }
386
- out [iter ] <- with(
359
+ # storage for the results, includes all possible time indexes
360
+ out <- tibble(.idx = c(1 : modulus , 999 ), climate_pred = double(modulus + 1 ))
361
+ for (tib_idx in tib $ .idx ) {
362
+ entries <- within_window(tib_idx , window_size , modulus )
363
+ out $ climate_pred [out $ .idx == tib_idx ] <- with(
387
364
purrr :: list_rbind(tib %> % filter(.idx %in% entries ) %> % pull(data )),
388
365
aggr(col , weights )
389
366
)
390
367
}
391
- tibble(.idx = unique(tib $ .idx ), climate_pred = out [seq_len(nrow(tib ))])
368
+ # filter to only the ones we actually computed
369
+ out %> % filter(.idx %in% idx_in )
370
+ }
371
+
372
+ # ' generate the idx values within `window_size` of `target_idx` given that our
373
+ # ' time value is of the type matching modulus
374
+ # ' @param target_idx the time index which we're drawing the window around
375
+ # ' @param window_size the size of the window on one side of `target_idx`
376
+ # ' @param modulus the number of days/weeks/months in the year, not including any leap days/weeks
377
+ # ' @keywords internal
378
+ within_window <- function (target_idx , window_size , modulus ) {
379
+ entries <- (target_idx - window_size ): (target_idx + window_size ) %% modulus
380
+ entries [entries == 0 ] <- modulus
381
+ # note that because we are 1-indexing, we're looking for indices that are 1
382
+ # larger than the actual day/week in the year
383
+ if (modulus == 365 ) {
384
+ # we need to grab just the window around the leap day on the leap day
385
+ if (target_idx == 999 ) {
386
+ # there's an extra data point in front of the leap day
387
+ entries <- (59 - window_size ): (59 + window_size - 1 ) %% modulus
388
+ entries [entries == 0 ] <- modulus
389
+ # adding in the leap day itself
390
+ entries <- c(entries , 999 )
391
+ } else if ((59 %in% entries ) || (60 %in% entries )) {
392
+ # if we're on the Feb/March boundary for daily data, we need to add in the
393
+ # leap day data
394
+ entries <- c(entries , 999 )
395
+ }
396
+ } else if (modulus == 52 ) {
397
+ # we need to grab just the window around the leap week on the leap week
398
+ if (target_idx == 999 ) {
399
+ entries <- (53 - window_size ): (53 + window_size - 1 ) %% 52
400
+ entries [entries == 0 ] <- 52
401
+ entries <- c(entries , 999 )
402
+ } else if ((52 %in% entries ) || (1 %in% entries )) {
403
+ # if we're on the year boundary for weekly data, we need to add in the
404
+ # leap week data (which is the extra week at the end)
405
+ entries <- c(entries , 999 )
406
+ }
407
+ }
408
+ entries
392
409
}
393
410
394
411
0 commit comments