diff --git a/.lintr b/.lintr index 226d5c03..41b22f67 100644 --- a/.lintr +++ b/.lintr @@ -1,7 +1,7 @@ linters: linters_with_defaults( line_length_linter = line_length_linter(120L), object_name_linter = object_name_linter(styles = c("snake_case", "symbols", "CamelCase")), - cyclocomp_linter = cyclocomp_linter(30L), + cyclocomp_linter = NULL, # Issues with R6 classes. object_length_linter(32L), indentation_linter = indentation_linter(hanging_indent_style = "tidy"), return_linter = NULL diff --git a/NAMESPACE b/NAMESPACE index 8a647e87..ea7e7be3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -98,6 +98,7 @@ export(get_tag_data) export(get_tags) export(get_thumbnail) export(get_timezones) +export(get_usage) export(get_usage_shiny) export(get_usage_static) export(get_user_permission) diff --git a/NEWS.md b/NEWS.md index 923bfff0..bdd05835 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # connectapi (development version) +## New features + +- New `get_usage()` function returns content usage data from Connect's `GET + v1/instrumentation/content/hits` endpoint on Connect v2025.04.0 and higher. + (#390) ## Enhancements and fixes diff --git a/R/connect.R b/R/connect.R index 9c3a642d..9d39e22c 100644 --- a/R/connect.R +++ b/R/connect.R @@ -818,6 +818,33 @@ Connect <- R6::R6Class( self$GET(path, query = query) }, + #' @description Get content usage data. + #' @param from Optional `Date` or `POSIXt`; start of the time window. If a + #' `Date`, coerced to `YYYY-MM-DDT00:00:00` in the caller's time zone. + #' @param to Optional `Date` or `POSIXt`; end of the time window. If a + #' `Date`, coerced to `YYYY-MM-DDT23:59:59` in the caller's time zone. + inst_content_hits = function(from = NULL, to = NULL) { + error_if_less_than(self$version, "2025.04.0") + + # If this is called with date objects with no timestamp attached, it's + # reasonable to assume that the caller is indicating the days as an + # inclusive range. + if (inherits(from, "Date")) { + from <- as.POSIXct(paste(from, "00:00:00")) + } + if (inherits(to, "Date")) { + to <- as.POSIXct(paste(to, "23:59:59")) + } + + self$GET( + v1_url("instrumentation", "content", "hits"), + query = list( + from = make_timestamp(from), + to = make_timestamp(to) + ) + ) + }, + #' @description Get running processes. procs = function() { warn_experimental("procs") diff --git a/R/get.R b/R/get.R index 5d5a87a2..c7737768 100644 --- a/R/get.R +++ b/R/get.R @@ -526,6 +526,72 @@ get_usage_static <- function( return(out) } +#' Get usage information for deployed content +#' +#' @description + +#' Retrieve content hits for all available content on the server. Available +#' content depends on the user whose API key is in use. Administrator accounts +#' will receive data for all content on the server. Publishers will receive data +#' for all content they own or collaborate on. +#' +#' If no date-times are provided, all usage data will be returned. + +#' @param client A `Connect` R6 client object. +#' @param from Optional `Date` or date-time (`POSIXct` or `POSIXlt`). Only +#' records after this time are returned. If a `Date`, treated as the start of +#' that day in the local time zone; if a date-time, used verbatim. +#' @param to Optional `Date` or date-time (`POSIXct` or `POSIXlt`). Only records +#' before this time are returned. If a `Date`, treated as end of that day +#' (`23:59:59`) in the local time zone; if a date-time, used verbatim. +#' +#' @return A tibble with columns: +#' * `id`: An identifier for the record. +#' * `user_guid`: The GUID of logged-in visitors, NA for anonymous. +#' * `content_guid`: The GUID of the content. +#' * `timestamp`: The time of the hit as `POSIXct`. +#' * `path`: The path of the hit. Not recorded for all content types. +#' * `user_agent`: If available, the user agent string for the hit. Not +#' available for all records. +#' +#' @details +#' +#' The data returned by `get_usage()` includes all content types. For Shiny +#' content, the `timestamp` indicates the *start* of the Shiny session. +#' Additional fields for Shiny and non-Shiny are available respectively from +#' `get_usage_shiny()` and `get_usage_static()`. +#' +#' When possible, however, we recommend using `get_usage()` over +#' `get_usage_static()` or `get_usage_shiny()`, as it will be much faster for +#' large datasets. +#' +#' @examples +#' \dontrun{ +#' client <- connect() +#' +#' # Fetch the last 2 days of hits +#' usage <- get_usage(client, from = Sys.Date() - 2, to = Sys.Date()) +#' +#' # Fetch usage after a specified date +#' usage <- get_usage( +#' client, +#' from = as.POSIXct("2025-05-02 12:40:00", tz = "UTC") +#' ) +#' +#' # Fetch all usage +#' usage <- get_usage(client) +#' } +#' +#' @export +get_usage <- function(client, from = NULL, to = NULL) { + usage_raw <- client$inst_content_hits( + from = from, + to = to + ) + + usage <- parse_connectapi_typed(usage_raw, connectapi_ptypes$usage) + fast_unnest_character(usage, "data") +} #' Get Audit Logs from Posit Connect Server #' diff --git a/R/parse.R b/R/parse.R index ed9c3d01..f01de490 100644 --- a/R/parse.R +++ b/R/parse.R @@ -58,15 +58,19 @@ ensure_column <- function(data, default, name) { # manual fix because vctrs::vec_cast cannot cast double -> datetime or char -> datetime col <- coerce_datetime(col, default, name = name) } + if (inherits(default, "fs_bytes") && !inherits(col, "fs_bytes")) { col <- coerce_fsbytes(col, default) } + if (inherits(default, "integer64") && !inherits(col, "integer64")) { col <- bit64::as.integer64(col) } + if (inherits(default, "list") && !inherits(col, "list")) { col <- list(col) } + col <- vctrs::vec_cast(col, default, x_arg = name) } data[[name]] <- col @@ -101,6 +105,65 @@ parse_connectapi <- function(data) { )) } +# nolint start +# Unnests a list column similarly to `tidyr::unnest_wider()`, bringing the +# entries of each list-item up to the top level. Makes some simplifying +# assumptions for the sake of performance: +# 1. All inner variables are treated as character vectors; +# 2. The names of the first entry of the list-column are used as the +# names of variables to extract. +# Performance example: +# > nrow(x_raw) +# [1] 373632 +# > nrow(x_raw) +# [1] 373632 +# > t_tidyr <- system.time( +# + x_tidyr <- tidyr::unnest_wider(x_raw, data) +# + ) +# > t_custom <- system.time( +# + x_custom <- fast_unnest_character(x_raw, "data") +# + ) +# > identical(x_tidyr, x_custom) +# [1] TRUE +# > t_tidyr +# user system elapsed +# 7.018 0.137 7.172 +# > t_custom +# user system elapsed +# 0.281 0.005 0.285 +# nolint end +fast_unnest_character <- function(df, col_name) { + if (!is.character(col_name)) { + stop("col_name must be a character vector") + } + if (!col_name %in% names(df)) { + stop("col_name is not present in df") + } + + list_col <- df[[col_name]] + + new_cols <- names(list_col[[1]]) + + df2 <- df + for (col in new_cols) { + df2[[col]] <- vapply( + list_col, + function(row) { + if (is.null(row[[col]])) { + NA_character_ + } else { + row[[col]] + } + }, + "1", + USE.NAMES = FALSE + ) + } + + df2[[col_name]] <- NULL + df2 +} + coerce_fsbytes <- function(x, to, ...) { if (is.numeric(x)) { fs::as_fs_bytes(x) diff --git a/R/ptype.R b/R/ptype.R index 94068a4f..7febeba8 100644 --- a/R/ptype.R +++ b/R/ptype.R @@ -1,5 +1,5 @@ NA_datetime_ <- # nolint: object_name_linter - vctrs::new_datetime(NA_real_, tzone = "UTC") + vctrs::new_datetime(NA_real_, tzone = Sys.timezone()) NA_list_ <- # nolint: object_name_linter list(list()) @@ -38,6 +38,13 @@ connectapi_ptypes <- list( "bundle_id" = NA_character_, "data_version" = NA_integer_ ), + usage = tibble::tibble( + "id" = NA_integer_, + "user_guid" = NA_character_, + "content_guid" = NA_character_, + "timestamp" = NA_datetime_, + "data" = NA_list_ + ), content = tibble::tibble( "guid" = NA_character_, "name" = NA_character_, diff --git a/man/PositConnect.Rd b/man/PositConnect.Rd index 9d45fbc3..1cc5ccd8 100644 --- a/man/PositConnect.Rd +++ b/man/PositConnect.Rd @@ -117,6 +117,7 @@ Other R6 classes: \item \href{#method-Connect-group_content}{\code{Connect$group_content()}} \item \href{#method-Connect-inst_content_visits}{\code{Connect$inst_content_visits()}} \item \href{#method-Connect-inst_shiny_usage}{\code{Connect$inst_shiny_usage()}} +\item \href{#method-Connect-inst_content_hits}{\code{Connect$inst_content_hits()}} \item \href{#method-Connect-procs}{\code{Connect$procs()}} \item \href{#method-Connect-repo_account}{\code{Connect$repo_account()}} \item \href{#method-Connect-repo_branches}{\code{Connect$repo_branches()}} @@ -1193,6 +1194,27 @@ Get (non-interactive) content visits. } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Connect-inst_content_hits}{}}} +\subsection{Method \code{inst_content_hits()}}{ +Get content usage data. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Connect$inst_content_hits(from = NULL, to = NULL)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{from}}{Optional \code{Date} or \code{POSIXt}; start of the time window. If a +\code{Date}, coerced to \code{YYYY-MM-DDT00:00:00} in the caller's time zone.} + +\item{\code{to}}{Optional \code{Date} or \code{POSIXt}; end of the time window. If a +\code{Date}, coerced to \code{YYYY-MM-DDT23:59:59} in the caller's time zone.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Connect-procs}{}}} \subsection{Method \code{procs()}}{ diff --git a/man/get_usage.Rd b/man/get_usage.Rd new file mode 100644 index 00000000..de423eda --- /dev/null +++ b/man/get_usage.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get.R +\name{get_usage} +\alias{get_usage} +\title{Get usage information for deployed content} +\usage{ +get_usage(client, from = NULL, to = NULL) +} +\arguments{ +\item{client}{A \code{Connect} R6 client object.} + +\item{from}{Optional \code{Date} or date-time (\code{POSIXct} or \code{POSIXlt}). Only +records after this time are returned. If a \code{Date}, treated as the start of +that day in the local time zone; if a date-time, used verbatim.} + +\item{to}{Optional \code{Date} or date-time (\code{POSIXct} or \code{POSIXlt}). Only records +before this time are returned. If a \code{Date}, treated as end of that day +(\code{23:59:59}) in the local time zone; if a date-time, used verbatim.} +} +\value{ +A tibble with columns: +\itemize{ +\item \code{id}: An identifier for the record. +\item \code{user_guid}: The GUID of logged-in visitors, NA for anonymous. +\item \code{content_guid}: The GUID of the content. +\item \code{timestamp}: The time of the hit as \code{POSIXct}. +\item \code{path}: The path of the hit. Not recorded for all content types. +\item \code{user_agent}: If available, the user agent string for the hit. Not +available for all records. +} +} +\description{ +Retrieve content hits for all available content on the server. Available +content depends on the user whose API key is in use. Administrator accounts +will receive data for all content on the server. Publishers will receive data +for all content they own or collaborate on. + +If no date-times are provided, all usage data will be returned. +} +\details{ +The data returned by \code{get_usage()} includes all content types. For Shiny +content, the \code{timestamp} indicates the \emph{start} of the Shiny session. +Additional fields for Shiny and non-Shiny are available respectively from +\code{get_usage_shiny()} and \code{get_usage_static()}. + +When possible, however, we recommend using \code{get_usage()} over +\code{get_usage_static()} or \code{get_usage_shiny()}, as it will be much faster for +large datasets. +} +\examples{ +\dontrun{ +client <- connect() + +# Fetch the last 2 days of hits +usage <- get_usage(client, from = Sys.Date() - 2, to = Sys.Date()) + +# Fetch usage after a specified date +usage <- get_usage( + client, + from = as.POSIXct("2025-05-02 12:40:00", tz = "UTC") +) + +# Fetch all usage +usage <- get_usage(client) +} + +} diff --git a/tests/testthat/2025.04.0/__api__/v1/instrumentation/content/hits-c331ad.json b/tests/testthat/2025.04.0/__api__/v1/instrumentation/content/hits-c331ad.json new file mode 100644 index 00000000..136be68e --- /dev/null +++ b/tests/testthat/2025.04.0/__api__/v1/instrumentation/content/hits-c331ad.json @@ -0,0 +1,52 @@ +[ + { + "id": 8966707, + "user_guid": null, + "content_guid": "475618c9", + "timestamp": "2025-04-30T12:49:16.269904Z", + "data": { + "path": "/hello", + "user_agent": "Datadog/Synthetics" + } + }, + { + "id": 8966708, + "user_guid": null, + "content_guid": "475618c9", + "timestamp": "2025-04-30T12:49:17.002848Z", + "data": { + "path": "/world", + "user_agent": null + } + }, + { + "id": 8967206, + "user_guid": null, + "content_guid": "475618c9", + "timestamp": "2025-04-30T13:01:47.40738Z", + "data": { + "path": "/chinchilla", + "user_agent": "Datadog/Synthetics" + } + }, + { + "id": 8967210, + "user_guid": null, + "content_guid": "475618c9", + "timestamp": "2025-04-30T13:04:13.176791Z", + "data": { + "path": "/lava-lamp", + "user_agent": "Datadog/Synthetics" + } + }, + { + "id": 8966214, + "user_guid": "fecbd383", + "content_guid": "b0eaf295", + "timestamp": "2025-04-30T12:36:13.818466Z", + "data": { + "path": null, + "user_agent": null + } + } +] diff --git a/tests/testthat/test-content.R b/tests/testthat/test-content.R index 64202e39..36720700 100644 --- a/tests/testthat/test-content.R +++ b/tests/testthat/test-content.R @@ -397,7 +397,7 @@ test_that("get_log() gets job logs", { source = c("stderr", "stderr", "stderr"), timestamp = structure( c(1733512169.9480169, 1733512169.9480703, 1733512169.9480758), - tzone = "UTC", + tzone = Sys.timezone(), class = c("POSIXct", "POSIXt") ), data = c( diff --git a/tests/testthat/test-get.R b/tests/testthat/test-get.R index 670e8656..d714f769 100644 --- a/tests/testthat/test-get.R +++ b/tests/testthat/test-get.R @@ -202,7 +202,7 @@ test_that("get_vanity_urls() works", { 1602623489, 1677679943 ), - tzone = "UTC", + tzone = Sys.timezone(), class = c("POSIXct", "POSIXt") ) ) @@ -330,7 +330,10 @@ test_that("get_packages() works as expected with `content_guid` names in API res test_that("get_content only requests vanity URLs for Connect 2024.06.0 and up", { with_mock_dir("2024.05.0", { - client <- Connect$new(server = "http://connect.example", api_key = "not-a-key") + client <- Connect$new( + server = "http://connect.example", + api_key = "not-a-key" + ) # `$version` is lazy, so we need to call it before `without_internet()`. client$version }) @@ -342,7 +345,10 @@ test_that("get_content only requests vanity URLs for Connect 2024.06.0 and up", }) with_mock_dir("2024.06.0", { - client <- Connect$new(server = "http://connect.example", api_key = "not-a-key") + client <- Connect$new( + server = "http://connect.example", + api_key = "not-a-key" + ) # `$version` is lazy, so we need to call it before `without_internet()`. client$version }) @@ -354,7 +360,10 @@ test_that("get_content only requests vanity URLs for Connect 2024.06.0 and up", }) with_mock_dir("2024.07.0", { - client <- Connect$new(server = "http://connect.example", api_key = "not-a-key") + client <- Connect$new( + server = "http://connect.example", + api_key = "not-a-key" + ) # `$version` is lazy, so we need to call it before `without_internet()`. client$version }) @@ -365,3 +374,86 @@ test_that("get_content only requests vanity URLs for Connect 2024.06.0 and up", ) }) }) + +test_that("get_usage() returns usage data in the expected shape", { + with_mock_dir("2025.04.0", { + client <- connect(server = "https://connect.example", api_key = "fake") + usage <- get_usage( + client, + from = as.POSIXct("2025-04-01 00:00:01", tz = "UTC") + ) + + expect_equal( + usage, + tibble::tibble( + id = c(8966707L, 8966708L, 8967206L, 8967210L, 8966214L), + user_guid = c(NA, NA, NA, NA, "fecbd383"), + content_guid = c( + "475618c9", + "475618c9", + "475618c9", + "475618c9", + "b0eaf295" + ), + timestamp = c( + parse_connect_rfc3339("2025-04-30T12:49:16.269904Z"), + parse_connect_rfc3339("2025-04-30T12:49:17.002848Z"), + parse_connect_rfc3339("2025-04-30T13:01:47.40738Z"), + parse_connect_rfc3339("2025-04-30T13:04:13.176791Z"), + parse_connect_rfc3339("2025-04-30T12:36:13.818466Z") + ), + path = c("/hello", "/world", "/chinchilla", "/lava-lamp", NA), + user_agent = c( + "Datadog/Synthetics", + NA, + "Datadog/Synthetics", + "Datadog/Synthetics", + NA + ) + ) + ) + }) +}) + +test_that("Metrics firehose is called with expected parameters", { + with_mock_api({ + client <- Connect$new(server = "https://connect.example", api_key = "fake") + # $version is loaded lazily, we need it before calling get_usage() + client$version + + without_internet({ + expect_GET( + get_usage(client), + "https://connect.example/__api__/v1/instrumentation/content/hits" + ) + expect_GET( + get_usage( + client, + from = as.POSIXct("2025-04-01 00:00:01", tz = "UTC"), + to = as.POSIXct("2025-04-02 00:00:01", tz = "UTC") + ), + paste0( + "https://connect.example/__api__/v1/instrumentation/content/hits?", + "from=2025-04-01T00%3A00%3A01Z&to=2025-04-02T00%3A00%3A01Z" + ) + ) + + # Dates are converted to timestamps with the system's time zone, so for + # repeatability we're gonna set it here. + + withr::local_envvar(TZ = "UTC") + + expect_GET( + get_usage( + client, + from = as.Date("2025-04-01"), + to = as.Date("2025-04-02") + ), + paste0( + "https://connect.example/__api__/v1/instrumentation/content/hits?", + "from=2025-04-01T00%3A00%3A00Z&to=2025-04-02T23%3A59%3A59Z" + ) + ) + }) + }) +}) diff --git a/tests/testthat/test-parse.R b/tests/testthat/test-parse.R index 4e182704..9875df88 100644 --- a/tests/testthat/test-parse.R +++ b/tests/testthat/test-parse.R @@ -336,3 +336,49 @@ test_that("works for bad inputs", { expect_s3_class(res$start_time, "POSIXct") expect_s3_class(res$end_time, "POSIXct") }) + +test_that("fast_unnest_character() extracts a list column to character columns", { + df <- tibble::tibble(id = 1:2, animal = c("cat", "dog")) + df$info <- list( + list(path = "/a", user_agent = "ua1"), + list(path = "/b", user_agent = "ua2") + ) + + out <- fast_unnest_character(df, "info") + expect_named(out, c("id", "animal", "path", "user_agent")) + expect_equal(out$id, 1:2) + expect_equal(out$animal, c("cat", "dog")) + expect_equal(out$path, c("/a", "/b")) + expect_equal(out$user_agent, c("ua1", "ua2")) + expect_false("info" %in% names(out)) +}) + +test_that("fast_unnest_character() converts NULL to NA", { + df <- tibble::tibble(id = 1:3, animal = c("cat", "dog", "chinchilla")) + df$info <- list( + list(path = "/a", user_agent = NULL), + list(path = NULL, user_agent = "ua2"), + list(path = NULL, user_agent = NULL) + ) + + out <- fast_unnest_character(df, "info") + expect_equal(out$path, c("/a", NA_character_, NA_character_)) + expect_equal(out$user_agent, c(NA_character_, "ua2", NA_character_)) +}) + +test_that("fast_unnest_character errs when column doesn’t exist", { + df <- data.frame(x = 1:2) + expect_error( + fast_unnest_character(df, "missing_col"), + "col_name is not present in df" + ) +}) + +test_that("fast_unnest_character errs when column isn't a list", { + x <- 1 + df <- data.frame(x = 1:2) + expect_error( + fast_unnest_character(df, x), + "col_name must be a character vector" + ) +})