diff --git a/.lintr b/.lintr index 226d5c031..41b22f67d 100644 --- a/.lintr +++ b/.lintr @@ -1,7 +1,7 @@ linters: linters_with_defaults( line_length_linter = line_length_linter(120L), object_name_linter = object_name_linter(styles = c("snake_case", "symbols", "CamelCase")), - cyclocomp_linter = cyclocomp_linter(30L), + cyclocomp_linter = NULL, # Issues with R6 classes. object_length_linter(32L), indentation_linter = indentation_linter(hanging_indent_style = "tidy"), return_linter = NULL diff --git a/DESCRIPTION b/DESCRIPTION index a3906bb4d..ba602beed 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -57,6 +57,7 @@ Suggests: rsconnect, spelling, testthat, + tidyr, webshot2, withr VignetteBuilder: diff --git a/NAMESPACE b/NAMESPACE index 2c799d1ed..30118f21e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,7 +5,9 @@ S3method("[",connect_tag_tree) S3method("[[",connect_tag_tree) S3method(api_build,op_base_connect) S3method(api_build,op_head) +S3method(as.data.frame,connect_list_hits) S3method(as.data.frame,tbl_connect) +S3method(as_tibble,connect_list_hits) S3method(connect_vars,op_base) S3method(connect_vars,op_single) S3method(connect_vars,tbl_connect) @@ -97,6 +99,7 @@ export(get_tag_data) export(get_tags) export(get_thumbnail) export(get_timezones) +export(get_usage) export(get_usage_shiny) export(get_usage_static) export(get_user_permission) @@ -159,6 +162,7 @@ importFrom(rlang,"%||%") importFrom(rlang,":=") importFrom(rlang,arg_match) importFrom(rlang,is_string) +importFrom(tibble,as_tibble) importFrom(utils,browseURL) importFrom(utils,capture.output) importFrom(utils,compareVersion) diff --git a/NEWS.md b/NEWS.md index 5942eed28..b31cd8fae 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # connectapi (development version) +## New features + +- New `get_usage()` function returns content usage data from Connect's `GET + v1/instrumentation/content/hits` endpoint on Connect v2025.04.0 and higher. + (#390) ## Enhancements and fixes diff --git a/R/connect.R b/R/connect.R index 5dd08546a..55a8a30b2 100644 --- a/R/connect.R +++ b/R/connect.R @@ -858,7 +858,9 @@ Connect <- R6::R6Class( docs = function(docs = "api", browse = TRUE) { stopifnot(docs %in% c("admin", "user", "api")) url <- paste0(self$server, "/__docs__/", docs) - if (browse) utils::browseURL(url) + if (browse) { + utils::browseURL(url) + } url }, diff --git a/R/get.R b/R/get.R index 6275b1152..cc3fa78d7 100644 --- a/R/get.R +++ b/R/get.R @@ -526,6 +526,157 @@ get_usage_static <- function( return(out) } +#' Get usage information for deployed content +#' +#' @description +#' Retrieve content hits for all available content on the server. Available +#' content depends on the user whose API key is in use. Administrator accounts +#' will receive data for all content on the server. Publishers will receive data +#' for all content they own or collaborate on. +#' +#' If no date-times are provided, all usage data will be returned. +#' +#' @param client A `Connect` R6 client object. +#' @param from Optional date-time (`POSIXct` or `POSIXlt`). Only +#' records after this time are returned. If not provided, records +#' are returned back to the first record available. +#' @param to Optional date-time (`POSIXct` or `POSIXlt`). Only records +#' before this time are returned. If not provided, all records up to +#' the most recent are returned. +#' +#' @return A list of usage records. Each record is a list with all elements +#' as character strings unless otherwise specified. +#' +#' * `id`: An integer identifier for the hit. +#' * `user_guid`: The user GUID if the visitor is logged-in, `NULL` for +#' anonymous hits. +#' * `content_guid`: The GUID of the visited content. +#' * `timestamp`: The time of the hit in RFC3339 format. +#' * `data`: A nested list with optional fields: +#' * `path`: The request path (if recorded). +#' * `user_agent`: The user agent string (if available). +#' +#' Use [as.data.frame()] or [tibble::as_tibble()] to convert to a flat +#' table with parsed types. In the resulting data frame: +#' +#' * `timestamp` is parsed to `POSIXct`. +#' * `path` and `user_agent` are extracted from the nested `data` field. +#' +#' By default, [as.data.frame()] attempts to extract the nested fields using +#' the \pkg{tidyr} package. If \pkg{tidyr} is not available, or if you want to +#' skip unnesting, call `as.data.frame(x, unnest = FALSE)` to leave `data` as +#' a list-column. +#' +#' @details +#' +#' The data returned by `get_usage()` includes all content types. For Shiny +#' content, the `timestamp` indicates the *start* of the Shiny session. +#' Additional fields for Shiny and non-Shiny are available respectively from +#' [get_usage_shiny()] and [get_usage_static()]. `get_usage_shiny()` includes a +#' field for the session end time; `get_usage_static()` includes variant, +#' rendering, and bundle identifiers for the visited content. +#' +#' When possible, however, we recommend using `get_usage()` over +#' `get_usage_static()` or `get_usage_shiny()`, as it is faster and more efficient. +#' +#' @seealso [as.data.frame.connect_list_hits()], [as_tibble.connect_list_hits()] +#' +#' @examples +#' \dontrun{ +#' client <- connect() +#' +#' # Fetch the last 2 days of hits +#' usage <- get_usage(client, from = Sys.Date() - 2, to = Sys.Date()) +#' +#' # Fetch usage after a specified date and convert to a data frame. +#' usage <- get_usage( +#' client, +#' from = as.POSIXct("2025-05-02 12:40:00", tz = "UTC") +#' ) +#' +#' # Fetch all usage +#' usage <- get_usage(client) +#' +#' # Convert to tibble or data frame +#' usage_df <- tibble::as_tibble(usage) +#' +#' # Skip unnesting if tidyr is not installed +#' usage_df <- as.data.frame(usage, unnest = FALSE) +#' } +#' +#' @export +get_usage <- function(client, from = NULL, to = NULL) { + error_if_less_than(client$version, "2025.04.0") + + usage <- client$GET( + v1_url("instrumentation", "content", "hits"), + query = list( + from = make_timestamp(from), + to = make_timestamp(to) + ) + ) + + class(usage) <- c("connect_list_hits", class(usage)) + usage +} + +#' Convert usage data to a data frame +#' +#' @description +#' Converts an object returned by [get_usage()] into a data frame with parsed +#' column types. By default, extracts `path` and `user_agent` from the `data` +#' field, if available. +#' +#' @param x A `connect_list_hits` object (from [get_usage()]). +#' @param row.names Passed to [base::as.data.frame()]. +#' @param optional Passed to [base::as.data.frame()]. +#' @param ... Passed to [base::as.data.frame()]. +#' @param unnest Logical; if `TRUE` (default), extracts nested fields using +#' \pkg{tidyr}. Set to `FALSE` to skip unnesting. +#' +#' @return A `data.frame` with one row per usage record. +#' @export +#' @method as.data.frame connect_list_hits +as.data.frame.connect_list_hits <- function( + x, + row.names = NULL, # nolint + optional = FALSE, + ..., + unnest = TRUE +) { + usage_df <- parse_connectapi_typed(x, connectapi_ptypes$usage) + if (unnest) { + if (!requireNamespace("tidyr", quietly = TRUE)) { + stop( + "`unnest = TRUE` requires tidyr. Install tidyr or set `unnest = FALSE`.", + call. = FALSE + ) + } + usage_df <- tidyr::unnest_wider( + usage_df, + "data", + ptype = list(path = character(0), user_agent = character(0)) + ) + } + as.data.frame(usage_df, row.names = row.names, optional = optional, ...) +} + +#' Convert usage data to a tibble +#' +#' @description +#' Converts an object returned by [get_usage()] to a tibble via +#' [as.data.frame.connect_list_hits()]. +#' +#' @param x A `connect_list_hits` object. +#' @param ... Passed to [as.data.frame()]. +#' +#' @return A tibble with one row per usage record. +#' @export +#' @importFrom tibble as_tibble +#' @method as_tibble connect_list_hits +as_tibble.connect_list_hits <- function(x, ...) { + tibble::as_tibble(as.data.frame(x, ...)) +} #' Get Audit Logs from Posit Connect Server #' diff --git a/R/parse.R b/R/parse.R index ed9c3d01b..22d582fef 100644 --- a/R/parse.R +++ b/R/parse.R @@ -58,15 +58,19 @@ ensure_column <- function(data, default, name) { # manual fix because vctrs::vec_cast cannot cast double -> datetime or char -> datetime col <- coerce_datetime(col, default, name = name) } + if (inherits(default, "fs_bytes") && !inherits(col, "fs_bytes")) { col <- coerce_fsbytes(col, default) } + if (inherits(default, "integer64") && !inherits(col, "integer64")) { col <- bit64::as.integer64(col) } + if (inherits(default, "list") && !inherits(col, "list")) { col <- list(col) } + col <- vctrs::vec_cast(col, default, x_arg = name) } data[[name]] <- col @@ -101,6 +105,7 @@ parse_connectapi <- function(data) { )) } + coerce_fsbytes <- function(x, to, ...) { if (is.numeric(x)) { fs::as_fs_bytes(x) diff --git a/R/ptype.R b/R/ptype.R index 94068a4f8..7febeba8d 100644 --- a/R/ptype.R +++ b/R/ptype.R @@ -1,5 +1,5 @@ NA_datetime_ <- # nolint: object_name_linter - vctrs::new_datetime(NA_real_, tzone = "UTC") + vctrs::new_datetime(NA_real_, tzone = Sys.timezone()) NA_list_ <- # nolint: object_name_linter list(list()) @@ -38,6 +38,13 @@ connectapi_ptypes <- list( "bundle_id" = NA_character_, "data_version" = NA_integer_ ), + usage = tibble::tibble( + "id" = NA_integer_, + "user_guid" = NA_character_, + "content_guid" = NA_character_, + "timestamp" = NA_datetime_, + "data" = NA_list_ + ), content = tibble::tibble( "guid" = NA_character_, "name" = NA_character_, diff --git a/_pkgdown.yml b/_pkgdown.yml index 00aa56801..d8f943c84 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -67,6 +67,8 @@ reference: Helpers to "get" data out of Connect contents: - starts_with("get") + - as.data.frame.connect_list_hits + - as_tibble.connect_list_hits - title: "Other" desc: > diff --git a/man/as.data.frame.connect_list_hits.Rd b/man/as.data.frame.connect_list_hits.Rd new file mode 100644 index 000000000..7bf650065 --- /dev/null +++ b/man/as.data.frame.connect_list_hits.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get.R +\name{as.data.frame.connect_list_hits} +\alias{as.data.frame.connect_list_hits} +\title{Convert usage data to a data frame} +\usage{ +\method{as.data.frame}{connect_list_hits}(x, row.names = NULL, optional = FALSE, ..., unnest = TRUE) +} +\arguments{ +\item{x}{A \code{connect_list_hits} object (from \code{\link[=get_usage]{get_usage()}}).} + +\item{row.names}{Passed to \code{\link[base:as.data.frame]{base::as.data.frame()}}.} + +\item{optional}{Passed to \code{\link[base:as.data.frame]{base::as.data.frame()}}.} + +\item{...}{Passed to \code{\link[base:as.data.frame]{base::as.data.frame()}}.} + +\item{unnest}{Logical; if \code{TRUE} (default), extracts nested fields using +\pkg{tidyr}. Set to \code{FALSE} to skip unnesting.} +} +\value{ +A \code{data.frame} with one row per usage record. +} +\description{ +Converts an object returned by \code{\link[=get_usage]{get_usage()}} into a data frame with parsed +column types. By default, extracts \code{path} and \code{user_agent} from the \code{data} +field, if available. +} diff --git a/man/as_tibble.connect_list_hits.Rd b/man/as_tibble.connect_list_hits.Rd new file mode 100644 index 000000000..7f3221dcf --- /dev/null +++ b/man/as_tibble.connect_list_hits.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get.R +\name{as_tibble.connect_list_hits} +\alias{as_tibble.connect_list_hits} +\title{Convert usage data to a tibble} +\usage{ +\method{as_tibble}{connect_list_hits}(x, ...) +} +\arguments{ +\item{x}{A \code{connect_list_hits} object.} + +\item{...}{Passed to \code{\link[=as.data.frame]{as.data.frame()}}.} +} +\value{ +A tibble with one row per usage record. +} +\description{ +Converts an object returned by \code{\link[=get_usage]{get_usage()}} to a tibble via +\code{\link[=as.data.frame.connect_list_hits]{as.data.frame.connect_list_hits()}}. +} diff --git a/man/get_usage.Rd b/man/get_usage.Rd new file mode 100644 index 000000000..403721299 --- /dev/null +++ b/man/get_usage.Rd @@ -0,0 +1,93 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get.R +\name{get_usage} +\alias{get_usage} +\title{Get usage information for deployed content} +\usage{ +get_usage(client, from = NULL, to = NULL) +} +\arguments{ +\item{client}{A \code{Connect} R6 client object.} + +\item{from}{Optional date-time (\code{POSIXct} or \code{POSIXlt}). Only +records after this time are returned. If not provided, records +are returned back to the first record available.} + +\item{to}{Optional date-time (\code{POSIXct} or \code{POSIXlt}). Only records +before this time are returned. If not provided, all records up to +the most recent are returned.} +} +\value{ +A list of usage records. Each record is a list with all elements +as character strings unless otherwise specified. +\itemize{ +\item \code{id}: An integer identifier for the hit. +\item \code{user_guid}: The user GUID if the visitor is logged-in, \code{NULL} for +anonymous hits. +\item \code{content_guid}: The GUID of the visited content. +\item \code{timestamp}: The time of the hit in RFC3339 format. +\item \code{data}: A nested list with optional fields: +\itemize{ +\item \code{path}: The request path (if recorded). +\item \code{user_agent}: The user agent string (if available). +} +} + +Use \code{\link[=as.data.frame]{as.data.frame()}} or \code{\link[tibble:as_tibble]{tibble::as_tibble()}} to convert to a flat +table with parsed types. In the resulting data frame: +\itemize{ +\item \code{timestamp} is parsed to \code{POSIXct}. +\item \code{path} and \code{user_agent} are extracted from the nested \code{data} field. +} + +By default, \code{\link[=as.data.frame]{as.data.frame()}} attempts to extract the nested fields using +the \pkg{tidyr} package. If \pkg{tidyr} is not available, or if you want to +skip unnesting, call \code{as.data.frame(x, unnest = FALSE)} to leave \code{data} as +a list-column. +} +\description{ +Retrieve content hits for all available content on the server. Available +content depends on the user whose API key is in use. Administrator accounts +will receive data for all content on the server. Publishers will receive data +for all content they own or collaborate on. + +If no date-times are provided, all usage data will be returned. +} +\details{ +The data returned by \code{get_usage()} includes all content types. For Shiny +content, the \code{timestamp} indicates the \emph{start} of the Shiny session. +Additional fields for Shiny and non-Shiny are available respectively from +\code{\link[=get_usage_shiny]{get_usage_shiny()}} and \code{\link[=get_usage_static]{get_usage_static()}}. \code{get_usage_shiny()} includes a +field for the session end time; \code{get_usage_static()} includes variant, +rendering, and bundle identifiers for the visited content. + +When possible, however, we recommend using \code{get_usage()} over +\code{get_usage_static()} or \code{get_usage_shiny()}, as it is faster and more efficient. +} +\examples{ +\dontrun{ +client <- connect() + +# Fetch the last 2 days of hits +usage <- get_usage(client, from = Sys.Date() - 2, to = Sys.Date()) + +# Fetch usage after a specified date and convert to a data frame. +usage <- get_usage( + client, + from = as.POSIXct("2025-05-02 12:40:00", tz = "UTC") +) + +# Fetch all usage +usage <- get_usage(client) + +# Convert to tibble or data frame +usage_df <- tibble::as_tibble(usage) + +# Skip unnesting if tidyr is not installed +usage_df <- as.data.frame(usage, unnest = FALSE) +} + +} +\seealso{ +\code{\link[=as.data.frame.connect_list_hits]{as.data.frame.connect_list_hits()}}, \code{\link[=as_tibble.connect_list_hits]{as_tibble.connect_list_hits()}} +} diff --git a/tests/testthat/2025.04.0/__api__/v1/instrumentation/content/hits-c331ad.json b/tests/testthat/2025.04.0/__api__/v1/instrumentation/content/hits-c331ad.json new file mode 100644 index 000000000..136be68ed --- /dev/null +++ b/tests/testthat/2025.04.0/__api__/v1/instrumentation/content/hits-c331ad.json @@ -0,0 +1,52 @@ +[ + { + "id": 8966707, + "user_guid": null, + "content_guid": "475618c9", + "timestamp": "2025-04-30T12:49:16.269904Z", + "data": { + "path": "/hello", + "user_agent": "Datadog/Synthetics" + } + }, + { + "id": 8966708, + "user_guid": null, + "content_guid": "475618c9", + "timestamp": "2025-04-30T12:49:17.002848Z", + "data": { + "path": "/world", + "user_agent": null + } + }, + { + "id": 8967206, + "user_guid": null, + "content_guid": "475618c9", + "timestamp": "2025-04-30T13:01:47.40738Z", + "data": { + "path": "/chinchilla", + "user_agent": "Datadog/Synthetics" + } + }, + { + "id": 8967210, + "user_guid": null, + "content_guid": "475618c9", + "timestamp": "2025-04-30T13:04:13.176791Z", + "data": { + "path": "/lava-lamp", + "user_agent": "Datadog/Synthetics" + } + }, + { + "id": 8966214, + "user_guid": "fecbd383", + "content_guid": "b0eaf295", + "timestamp": "2025-04-30T12:36:13.818466Z", + "data": { + "path": null, + "user_agent": null + } + } +] diff --git a/tests/testthat/test-content.R b/tests/testthat/test-content.R index 73a861804..eb29015e0 100644 --- a/tests/testthat/test-content.R +++ b/tests/testthat/test-content.R @@ -421,7 +421,7 @@ test_that("get_log() gets job logs", { source = c("stderr", "stderr", "stderr"), timestamp = structure( c(1733512169.9480169, 1733512169.9480703, 1733512169.9480758), - tzone = "UTC", + tzone = Sys.timezone(), class = c("POSIXct", "POSIXt") ), data = c( diff --git a/tests/testthat/test-get.R b/tests/testthat/test-get.R index 670e8656b..3bb7d14ac 100644 --- a/tests/testthat/test-get.R +++ b/tests/testthat/test-get.R @@ -202,7 +202,7 @@ test_that("get_vanity_urls() works", { 1602623489, 1677679943 ), - tzone = "UTC", + tzone = Sys.timezone(), class = c("POSIXct", "POSIXt") ) ) @@ -330,7 +330,10 @@ test_that("get_packages() works as expected with `content_guid` names in API res test_that("get_content only requests vanity URLs for Connect 2024.06.0 and up", { with_mock_dir("2024.05.0", { - client <- Connect$new(server = "http://connect.example", api_key = "not-a-key") + client <- Connect$new( + server = "http://connect.example", + api_key = "not-a-key" + ) # `$version` is lazy, so we need to call it before `without_internet()`. client$version }) @@ -342,7 +345,10 @@ test_that("get_content only requests vanity URLs for Connect 2024.06.0 and up", }) with_mock_dir("2024.06.0", { - client <- Connect$new(server = "http://connect.example", api_key = "not-a-key") + client <- Connect$new( + server = "http://connect.example", + api_key = "not-a-key" + ) # `$version` is lazy, so we need to call it before `without_internet()`. client$version }) @@ -354,7 +360,10 @@ test_that("get_content only requests vanity URLs for Connect 2024.06.0 and up", }) with_mock_dir("2024.07.0", { - client <- Connect$new(server = "http://connect.example", api_key = "not-a-key") + client <- Connect$new( + server = "http://connect.example", + api_key = "not-a-key" + ) # `$version` is lazy, so we need to call it before `without_internet()`. client$version }) @@ -365,3 +374,98 @@ test_that("get_content only requests vanity URLs for Connect 2024.06.0 and up", ) }) }) + +with_mock_dir("2025.04.0", { + test_that("get_usage() returns usage data in the expected shape", { + client <- connect(server = "https://connect.example", api_key = "fake") + usage <- get_usage( + client, + from = as.POSIXct("2025-04-01 00:00:01", tz = "UTC") + ) + + expect_s3_class(usage, "connect_list_hits") + expect_s3_class(usage, "list") + + expect_length(usage, 5) + + # Check first element + expect_equal( + usage[[1]], + list( + id = 8966707L, + user_guid = NULL, + content_guid = "475618c9", + timestamp = "2025-04-30T12:49:16.269904Z", + data = list( + path = "/hello", + user_agent = "Datadog/Synthetics" + ) + ) + ) + + # Check conversion to data.frame + usage_df <- as.data.frame(usage) + expect_equal( + usage_df, + data.frame( + id = c(8966707L, 8966708L, 8967206L, 8967210L, 8966214L), + user_guid = c(NA, NA, NA, NA, "fecbd383"), + content_guid = c( + "475618c9", + "475618c9", + "475618c9", + "475618c9", + "b0eaf295" + ), + timestamp = c( + parse_connect_rfc3339(c( + "2025-04-30T12:49:16.269904Z", + "2025-04-30T12:49:17.002848Z", + "2025-04-30T13:01:47.40738Z", + "2025-04-30T13:04:13.176791Z", + "2025-04-30T12:36:13.818466Z" + )) + ), + path = c("/hello", "/world", "/chinchilla", "/lava-lamp", NA), + user_agent = c( + "Datadog/Synthetics", + NA, + "Datadog/Synthetics", + "Datadog/Synthetics", + NA + ) + ) + ) + + # Check conversion with unnest=FALSE + usage_df_no_unnest <- as.data.frame(usage, unnest = FALSE) + expect_equal( + names(usage_df_no_unnest), + c("id", "user_guid", "content_guid", "timestamp", "data") + ) + }) + + test_that("Metrics firehose is called with expected parameters", { + client <- Connect$new(server = "https://connect.example", api_key = "fake") + # $version is loaded lazily, we need it before calling get_usage() + client$version + + without_internet({ + expect_GET( + get_usage(client), + "https://connect.example/__api__/v1/instrumentation/content/hits" + ) + expect_GET( + get_usage( + client, + from = as.POSIXct("2025-04-01 00:00:01", tz = "UTC"), + to = as.POSIXct("2025-04-02 00:00:01", tz = "UTC") + ), + paste0( + "https://connect.example/__api__/v1/instrumentation/content/hits?", + "from=2025-04-01T00%3A00%3A01Z&to=2025-04-02T00%3A00%3A01Z" + ) + ) + }) + }) +})