Skip to content

Commit b9a1b7c

Browse files
authored
feat: add ignore_cache to fetch_args_list (#301)
* feat: add refresh_cache to fetch_args_list * fix: only_supports_classic was parsed incorrectly * fix: clear_cache reuses previous set_cache settings * feat: add is_cache_enabled() * refactor: cache_epidata_call integrated into fetch() * refactor: removed fetch_tbl() and integrated into fetch() * test: fix cache tests to not clear user cache * ci: update old actions * doc: comment cacheable functions
1 parent 929f8ac commit b9a1b7c

15 files changed

+237
-187
lines changed

.github/workflows/test-coverage.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,4 @@ jobs:
5353
path: ${{ runner.temp }}/package
5454

5555
- name: Upload coverage reports to Codecov
56-
uses: codecov/codecov-action@v3
56+
uses: codecov/codecov-action@v5

DESCRIPTION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ URL: https://cmu-delphi.github.io/epidatr/,
3030
https://cmu-delphi.github.io/delphi-epidata/,
3131
https://github.com/cmu-delphi/epidatr
3232
BugReports: https://github.com/cmu-delphi/epidatr/issues
33-
Depends:
33+
Depends:
3434
R (>= 3.5.0)
3535
Imports:
3636
cachem,
@@ -58,9 +58,9 @@ Suggests:
5858
rmarkdown,
5959
testthat (>= 3.1.5),
6060
withr
61-
VignetteBuilder:
61+
VignetteBuilder:
6262
knitr
63-
Remotes:
63+
Remotes:
6464
cmu-delphi/delphidocs
6565
Config/Needs/website: cmu-delphi/delphidocs
6666
Config/testthat/edition: 3

NAMESPACE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ import(cachem)
5353
import(glue)
5454
importFrom(MMWRweek,MMWRweek)
5555
importFrom(MMWRweek,MMWRweek2Date)
56+
importFrom(cachem,is.key_missing)
5657
importFrom(checkmate,assert)
5758
importFrom(checkmate,assert_character)
5859
importFrom(checkmate,assert_integerish)
@@ -80,7 +81,6 @@ importFrom(magrittr,"%>%")
8081
importFrom(openssl,md5)
8182
importFrom(purrr,map_chr)
8283
importFrom(purrr,map_lgl)
83-
importFrom(readr,read_csv)
8484
importFrom(stats,na.omit)
8585
importFrom(tibble,as_tibble)
8686
importFrom(tibble,tibble)

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
- Support more date formats in function to convert dates to epiweeks. Use `parse_api_date` since it already supports both common formats. #276
1313
- `EPIDATR_USE_CACHE` only supported exactly "TRUE" before. Now it supports all logical values and includes a warning when any value that can't be converted to logical is provided. #273
1414
- `missing` doesn't count default values as non-missing. If a user doesn't pass `geo_values` or `time_values` (both of which default to `"*"` in `pub_covidcast`), or `dates` (in `pub_covid_hosp_state_timeseries`), the missing check fails. To avoid this, just don't check missingness of those two arguments.
15+
- `fetch_args_list` now has an `refresh_cache` argument, which is `FALSE` by default.
1516

1617
# epidatr 1.1.1
1718

R/cache.R

Lines changed: 89 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
cache_environ <- new.env(parent = emptyenv())
44
cache_environ$use_cache <- NULL
55
cache_environ$epidatr_cache <- NULL
6+
cache_environ$cache_args <- NULL
67

78
#' Create or renew a cache for this session
89
#' @aliases set_cache
@@ -169,6 +170,12 @@ set_cache <- function(cache_dir = NULL,
169170
max_age = days * 24 * 60 * 60,
170171
logfile = file.path(cache_dir, logfile)
171172
)
173+
cache_environ$cache_args <- list2(
174+
cache_dir = cache_dir,
175+
days = days,
176+
max_size = max_size,
177+
logfile = logfile
178+
)
172179
}
173180

174181
cli::cli_inform(c(
@@ -183,9 +190,9 @@ set_cache <- function(cache_dir = NULL,
183190
#' Manually reset the cache, deleting all currently saved data and starting afresh
184191
#' @description
185192
#' Deletes the current cache and resets a new cache. Deletes local data! If you
186-
#' are using a session unique cache, you will have to pass the arguments you
187-
#' used for `set_cache` earlier, otherwise the system-wide `.Renviron`-based
188-
#' defaults will be used.
193+
#' are using a session unique cache, the previous settings will be reused. If
194+
#' you pass in new `set_cache` arguments, they will take precedence over the
195+
#' previous settings.
189196
#' @param disable instead of setting a new cache, disable caching entirely;
190197
#' defaults to `FALSE`
191198
#' @inheritDotParams set_cache
@@ -198,11 +205,22 @@ set_cache <- function(cache_dir = NULL,
198205
clear_cache <- function(..., disable = FALSE) {
199206
if (any(!is.na(cache_environ$epidatr_cache))) {
200207
cache_environ$epidatr_cache$destroy()
208+
recovered_args <- cache_environ$cache_args
209+
cache_environ$cache_args <- NULL
210+
} else {
211+
recovered_args <- list()
201212
}
213+
args <- rlang::dots_list(
214+
...,
215+
confirm = FALSE,
216+
!!!recovered_args,
217+
.homonyms = "first",
218+
.ignore_empty = "all"
219+
)
202220
if (disable) {
203221
cache_environ$epidatr_cache <- NULL
204222
} else {
205-
set_cache(...)
223+
rlang::inject(set_cache(!!!args))
206224
}
207225
}
208226

@@ -234,68 +252,85 @@ disable_cache <- function() {
234252
#' disable without deleting
235253
#' @export
236254
cache_info <- function() {
237-
if (is.null(cache_environ$epidatr_cache)) {
238-
return("there is no cache")
239-
} else {
255+
if (is_cache_enabled()) {
240256
return(cache_environ$epidatr_cache$info())
257+
} else {
258+
return("there is no cache")
241259
}
242260
}
243261

244-
#' Dispatch caching
262+
#' Check if the cache is enabled
263+
#' @keywords internal
264+
is_cache_enabled <- function() {
265+
!is.null(cache_environ$epidatr_cache)
266+
}
267+
268+
#' Helper that checks whether a call is actually cachable
269+
#'
270+
#' The cacheable endpoints are those with `as_of` or `issues` parameters:
271+
#' - pub_covidcast
272+
#' - pub_covid_hosp_state_timeseries
273+
#' - pub_ecdc_ili
274+
#' - pub_flusurv
275+
#' - pub_fluview_clinical
276+
#' - pub_fluview
277+
#' - pub_kcdc_ili
278+
#' - pub_nidss_flu
279+
#' - pub_paho_dengue
280+
#'
281+
#' @keywords internal
282+
check_is_cachable <- function(epidata_call, fetch_args) {
283+
as_of_cachable <- !is.null(epidata_call$params$as_of) && !identical(epidata_call$params$as_of, "*")
284+
issues_cachable <- !is.null(epidata_call$params$issues) && !identical(epidata_call$params$issues, "*")
285+
is_cachable <- (
286+
# Cache should be enabled
287+
is_cache_enabled() &&
288+
# Call should be cachable
289+
(as_of_cachable || issues_cachable) &&
290+
# This should not be a dry run
291+
!fetch_args$dry_run &&
292+
# Base url should be null
293+
is.null(fetch_args$base_url) &&
294+
# Don't cache debug calls
295+
!fetch_args$debug &&
296+
# Format type should be json
297+
fetch_args$format_type == "json" &&
298+
# Fields should be null
299+
is.null(fetch_args$fields) &&
300+
# Disable date parsing should be false
301+
!fetch_args$disable_date_parsing &&
302+
# Disable data frame parsing should be false
303+
!fetch_args$disable_data_frame_parsing &&
304+
# Refresh cache should be false
305+
fetch_args$refresh_cache == FALSE
306+
)
307+
return(is_cachable)
308+
}
309+
310+
#' Check for warnings for the cache
245311
#'
246312
#' @description
247-
#' The guts of caching, its interposed between fetch and the specific fetch
248-
#' methods. Internal method only.
313+
#' Adds warnings when arguments are potentially too recent to use with the cache.
249314
#'
250315
#' @param epidata_call the `epidata_call` object
251316
#' @param fetch_args the args list for fetch as generated by [`fetch_args_list()`]
252317
#' @keywords internal
253-
#' @importFrom openssl md5
254-
cache_epidata_call <- function(epidata_call, fetch_args = fetch_args_list()) {
255-
is_cachable <- check_is_cachable(epidata_call, fetch_args)
256-
if (is_cachable) {
257-
target <- request_url(epidata_call)
258-
hashed <- md5(target)
259-
cached <- cache_environ$epidatr_cache$get(hashed)
260-
as_of_recent <- check_is_recent(epidata_call$params$as_of, 7)
261-
issues_recent <- check_is_recent(epidata_call$params$issues, 7)
262-
if (as_of_recent || issues_recent) {
263-
cli::cli_warn(
264-
c(
265-
"Using cached results with `as_of` within the past week (or the future!).
318+
check_for_cache_warnings <- function(epidata_call, fetch_args) {
319+
as_of_recent <- check_is_recent(epidata_call$params$as_of, 7)
320+
issues_recent <- check_is_recent(epidata_call$params$issues, 7)
321+
if (as_of_recent || issues_recent) {
322+
cli::cli_warn(
323+
c(
324+
"Using cached results with `as_of` within the past week (or the future!).
266325
This will likely result in an invalid cache. Consider",
267-
"i" = "disabling the cache for this session with `disable_cache` or
326+
"i" = "disabling the cache for this session with `disable_cache` or
268327
permanently with environmental variable `EPIDATR_USE_CACHE=FALSE`",
269-
"i" = "setting `EPIDATR_CACHE_MAX_AGE_DAYS={Sys.getenv('EPIDATR_CACHE_MAX_AGE_DAYS
328+
"i" = "setting `EPIDATR_CACHE_MAX_AGE_DAYS={Sys.getenv('EPIDATR_CACHE_MAX_AGE_DAYS
270329
', unset = 1)}` to e.g. `3/24` (3 hours)."
271-
),
272-
.frequency = "regularly",
273-
.frequency_id = "cache timing issues",
274-
class = "cache_recent_data"
275-
)
276-
}
277-
if (!is.key_missing(cached)) {
278-
cli::cli_warn(
279-
c(
280-
"Loading from the cache at {cache_environ$epidatr_cache$info()$dir};
281-
see {cache_environ$epidatr_cache$info()$logfile} for more details."
282-
),
283-
.frequency = "regularly",
284-
.frequency_id = "using the cache",
285-
class = "cache_access"
286-
)
287-
return(cached[[1]])
288-
}
289-
}
290-
# need to actually get the data, since its either not in the cache or we're not caching
291-
runtime <- system.time(if (epidata_call$only_supports_classic) {
292-
fetched <- fetch_classic(epidata_call, fetch_args)
293-
} else {
294-
fetched <- fetch_tbl(epidata_call, fetch_args)
295-
})
296-
# add it to the cache if appropriate
297-
if (is_cachable) {
298-
cache_environ$epidatr_cache$set(hashed, list(fetched, Sys.time(), runtime))
330+
),
331+
.frequency = "regularly",
332+
.frequency_id = "cache timing issues",
333+
class = "cache_recent_data"
334+
)
299335
}
300-
return(fetched)
301336
}

R/epidatacall.R

Lines changed: 43 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ print.epidata_call <- function(x, ...) {
154154
#' @param debug if `TRUE`, return the raw response from the API
155155
#' @param format_type the format to request from the API, one of classic, json,
156156
#' csv; this is only used by `fetch_debug`, and by default is `"json"`
157+
#' @param refresh_cache if `TRUE`, ignore the cache, fetch the data from the
158+
#' API, and update the cache, if it is enabled
157159
#' @return A `fetch_args` object containing all the specified options
158160
#' @export
159161
#' @aliases fetch_args
@@ -168,7 +170,8 @@ fetch_args_list <- function(
168170
base_url = NULL,
169171
dry_run = FALSE,
170172
debug = FALSE,
171-
format_type = c("json", "classic", "csv")) {
173+
format_type = c("json", "classic", "csv"),
174+
refresh_cache = FALSE) {
172175
rlang::check_dots_empty()
173176

174177
assert_character(fields, null.ok = TRUE, any.missing = FALSE)
@@ -180,6 +183,7 @@ fetch_args_list <- function(
180183
assert_logical(dry_run, null.ok = FALSE, len = 1L, any.missing = TRUE)
181184
assert_logical(debug, null.ok = FALSE, len = 1L, any.missing = FALSE)
182185
format_type <- match.arg(format_type)
186+
assert_logical(refresh_cache, null.ok = FALSE, len = 1L, any.missing = FALSE)
183187

184188
structure(
185189
list(
@@ -191,7 +195,8 @@ fetch_args_list <- function(
191195
base_url = base_url,
192196
dry_run = dry_run,
193197
debug = debug,
194-
format_type = format_type
198+
format_type = format_type,
199+
refresh_cache = refresh_cache
195200
),
196201
class = "fetch_args"
197202
)
@@ -219,6 +224,9 @@ print.fetch_args <- function(x, ...) {
219224
#' - For `fetch`: a tibble or a JSON-like list
220225
#' @export
221226
#' @include cache.R
227+
#' @importFrom openssl md5
228+
#' @importFrom cachem is.key_missing
229+
#' @importFrom tibble tibble as_tibble
222230
#'
223231
fetch <- function(epidata_call, fetch_args = fetch_args_list()) {
224232
stopifnot(inherits(epidata_call, "epidata_call"))
@@ -228,48 +236,49 @@ fetch <- function(epidata_call, fetch_args = fetch_args_list()) {
228236
epidata_call <- with_base_url(epidata_call, fetch_args$base_url)
229237
}
230238

239+
# Just display the epidata_call object, don't fetch the data
231240
if (fetch_args$dry_run) {
232241
return(epidata_call)
233242
}
234243

244+
# Just display the raw response from the API, don't parse
235245
if (fetch_args$debug) {
236246
return(fetch_debug(epidata_call, fetch_args))
237247
}
238248

239-
cache_epidata_call(epidata_call, fetch_args = fetch_args)
240-
}
241-
242-
#' Fetches the data and returns a tibble
243-
#' @rdname fetch_tbl
244-
#'
245-
#' @param epidata_call an instance of `epidata_call`
246-
#' @param fetch_args a `fetch_args` object
247-
#' @importFrom readr read_csv
248-
#' @importFrom httr stop_for_status content
249-
#' @importFrom tibble as_tibble tibble
250-
#' @return
251-
#' - For `fetch_tbl`: a [`tibble::tibble`]
252-
#' @keywords internal
253-
fetch_tbl <- function(epidata_call, fetch_args = fetch_args_list()) {
254-
stopifnot(inherits(epidata_call, "epidata_call"))
255-
stopifnot(inherits(fetch_args, "fetch_args"))
249+
# Check if the data is cachable
250+
is_cachable <- check_is_cachable(epidata_call, fetch_args)
251+
if (is_cachable) {
252+
check_for_cache_warnings(epidata_call, fetch_args)
256253

257-
if (epidata_call$only_supports_classic) {
258-
cli::cli_abort(
259-
c(
260-
"This endpoint only supports the classic message format, due to non-standard behavior.
261-
Use fetch_classic instead."
262-
),
263-
epidata_call = epidata_call,
264-
class = "only_supports_classic_format"
265-
)
254+
# Check if the data is in the cache
255+
target <- request_url(epidata_call)
256+
hashed <- md5(target)
257+
cached <- cache_environ$epidatr_cache$get(hashed)
258+
if (!is.key_missing(cached)) {
259+
return(cached[[1]]) # extract `fetched` from `fetch()`, no metadata
260+
}
266261
}
267262

268-
response_content <- fetch_classic(epidata_call, fetch_args = fetch_args)
269-
if (fetch_args$return_empty && length(response_content) == 0) {
270-
return(tibble())
263+
# Need to actually get the data, since its either not in the cache or we're not caching
264+
runtime <- system.time(if (epidata_call$only_supports_classic) {
265+
fetch_args[["disable_data_frame_parsing"]] <- TRUE
266+
fetched <- fetch_classic(epidata_call, fetch_args)
267+
} else {
268+
response_content <- fetch_classic(epidata_call, fetch_args = fetch_args)
269+
if (fetch_args$return_empty && length(response_content) == 0) {
270+
fetched <- tibble()
271+
} else {
272+
fetched <- parse_data_frame(epidata_call, response_content, fetch_args$disable_date_parsing) %>% as_tibble()
273+
}
274+
})
275+
276+
# Add it to the cache if appropriate
277+
if (is_cachable || (fetch_args$refresh_cache && is_cache_enabled())) {
278+
cache_environ$epidatr_cache$set(hashed, list(fetched, Sys.time(), runtime))
271279
}
272-
return(parse_data_frame(epidata_call, response_content, fetch_args$disable_date_parsing) %>% as_tibble())
280+
281+
return(fetched)
273282
}
274283

275284
#' Fetches the data, raises on epidata errors, and returns the results as a
@@ -303,6 +312,7 @@ fetch_classic <- function(epidata_call, fetch_args = fetch_args_list()) {
303312
)
304313
}
305314
}
315+
306316
if (response_content$message != "success") {
307317
cli::cli_warn(
308318
c(
@@ -311,6 +321,7 @@ fetch_classic <- function(epidata_call, fetch_args = fetch_args_list()) {
311321
class = "epidata_warning"
312322
)
313323
}
324+
314325
return(response_content$epidata)
315326
}
316327

0 commit comments

Comments
 (0)