vignette hack

OHDSI · Jan 6, 2025 · e69c5c7 · e69c5c7
1 parent d4c482c
commit e69c5c7
Show file tree

Hide file tree

Showing 5 changed files with 63 additions and 54 deletions.
diff --git a/R/Analyses.R b/R/Analyses.R
@@ -95,21 +95,23 @@ loadSccAnalysisList <- function(file) {
 #' @details
 #' Create a hypothesis of interest, to be used with the \code{\link{runSccAnalyses}} function.
 #'
-#' @param exposureId        A concept ID indentifying the drug of interest in the exposure table. If
-#'                          multiple strategies for picking the exposure will be tested in the analysis, a
-#'                          named list of numbers can be provided instead. In the analysis, the name of the
-#'                          number to be used can be specified using the \code{exposureType} parameter in
-#'                          the \code{\link{createSccAnalysis}} function.
-#' @param outcomeId         A concept ID indentifying the outcome of interest in the outcome table. If
-#'                          multiple strategies for picking the outcome will be tested in the analysis, a
-#'                          named list of numbers can be provided instead. In the analysis, the name of the
-#'                          number to be used can be specified using the #' \code{outcomeType} parameter in
-#'                          the \code{\link{createSccAnalysis}} function.
-#' @param trueEffectSize    Should this be set to 1 this will be considererd a negative control
+#' @param exposureId       A concept ID indentifying the drug of interest in the exposure table. If
+#'                         multiple strategies for picking the exposure will be tested in the analysis,
+#'                         a named list of numbers can be provided instead. In the analysis, the name
+#'                         of the number to be used can be specified using the \code{exposureType}
+#'                         parameter in the \code{\link{createSccAnalysis}} function.
+#' @param outcomeId        A concept ID indentifying the outcome of interest in the outcome table. If
+#'                         multiple strategies for picking the outcome will be tested in the analysis,
+#'                         a named list of numbers can be provided instead. In the analysis, the name
+#'                         of the number to be used can be specified using the #' \code{outcomeType}
+#'                         parameter in the \code{\link{createSccAnalysis}} function.
+#' @param trueEffectSize   Should this be set to 1 this will be considererd a negative control
 #'
 #' @export
-createExposureOutcome <- function(exposureId, outcomeId,  trueEffectSize = NA) {
-  exposureOutcome <- list(exposureId = exposureId, outcomeId = outcomeId, trueEffectSize = trueEffectSize)
+createExposureOutcome <- function(exposureId, outcomeId, trueEffectSize = NA) {
+  exposureOutcome <- list(exposureId = exposureId,
+                          outcomeId = outcomeId,
+                          trueEffectSize = trueEffectSize)
   class(exposureOutcome) <- "exposureOutcome"
   return(exposureOutcome)
 }

diff --git a/R/Calibration.R b/R/Calibration.R
@@ -27,29 +27,31 @@ getNullDist <- function(negatives) {
 #' Compute calibrated rows
 #' @description
 #' Actual calibration is performed here in a dplyr friendly way
-#' @param positives this is the cohort set that should be calibrated
-#' @param negatives these are the negative control cohort results
-#' @param idCol - either target_cohort_id or outcome_cohort_id - to keep
-#' @return data.frame
+#' @param positives   this is the cohort set that should be calibrated
+#' @param negatives   these are the negative control cohort results
+#' @param idCol       - either target_cohort_id or outcome_cohort_id - to keep
+#' @return
+#' data.frame
 #' @noRd
-computeCalibratedRows <- function(positives,
-                                  negatives,
-                                  idCol = NULL,
-                                  keepCols = c("cPt", "cAtRisk", "cCases", "tCases", "tAtRisk")) {
+computeCalibratedRows <- function(positives, negatives, idCol = NULL, keepCols = c("cPt", "cAtRisk",
+  "cCases", "tCases", "tAtRisk")) {
   checkmate::assertDataFrame(positives, col.names = "named")
   checkmate::assertNames(names(positives), must.include = c("rr", "seLogRr", keepCols, idCol))
   nullDist <- getNullDist(negatives)
   errorModel <- EmpiricalCalibration::convertNullToErrorModel(nullDist)
-  ci <- EmpiricalCalibration::calibrateConfidenceInterval(log(positives$rr), positives$seLogRr, errorModel)
+  ci <- EmpiricalCalibration::calibrateConfidenceInterval(log(positives$rr),
+                                                          positives$seLogRr,
+                                                          errorModel)
 
   # Row matches fields in the database excluding the ids, used in dplyr, group_by with keep_true
-  result <- tibble::tibble(pValue = EmpiricalCalibration::calibrateP(nullDist, log(positives$rr), positives$seLogRr),
-                           ub95 = exp(ci$logUb95Rr),
-                           lb95 = exp(ci$logLb95Rr),
-                           rr = exp(ci$logRr),
-                           seLogRr = ci$seLogRr)
+  result <- tibble::tibble(pValue = EmpiricalCalibration::calibrateP(nullDist,
+                                                                     log(positives$rr),
+                                                                     positives$seLogRr),
+    ub95 = exp(ci$logUb95Rr), lb95 = exp(ci$logLb95Rr), rr = exp(ci$logRr), seLogRr = ci$seLogRr)
 
-  keptColumns <- positives |> dplyr::select(dplyr::all_of(c(keepCols, idCol)))
-  result <- result |> dplyr::bind_cols(keptColumns)
+  keptColumns <- positives |>
+    dplyr::select(dplyr::all_of(c(keepCols, idCol)))
+  result <- result |>
+    dplyr::bind_cols(keptColumns)
   return(result)
-}
+}
diff --git a/R/CreateArgFunctions.R b/R/CreateArgFunctions.R
@@ -15,8 +15,8 @@
 #'                                       is 'yyyymmdd'.
 #' @param studyEndDate                   Date for maximum allowable data for index exposure. Dateformat
 #'                                       is 'yyyymmdd'.
-#' @param addLengthOfExposureExposed     If TRUE, use the duration from drugEraStart -> drugEraEnd
-#'                                       as part of timeAtRisk.
+#' @param addLengthOfExposureExposed     If TRUE, use the duration from drugEraStart -> drugEraEnd as
+#'                                       part of timeAtRisk.
 #' @param riskWindowStartExposed         Integer of days to add to drugEraStart for start oftimeAtRisk
 #'                                       (0 to include index date, 1 to start the dayafter).
 #' @param riskWindowEndExposed           Additional window to add to end of exposure period
@@ -33,10 +33,10 @@
 #'                                       and unexposed.
 #' @param washoutPeriod                  Integer to define required time observed before exposurestart.
 #' @param followupPeriod                 Integer to define required time observed after exposurestart.
-#' @param computeTarDistribution         If TRUE, computer the distribution of time-at-risk and
-#'                                       average absolute time between treatment and outcome. Note,
-#'                                       may add significant computation time on some database
-#'                                       engines. If set true in one analysis will default to true for all others.
+#' @param computeTarDistribution         If TRUE, computer the distribution of time-at-risk and average
+#'                                       absolute time between treatment and outcome. Note, may add
+#'                                       significant computation time on some database engines. If set
+#'                                       true in one analysis will default to true for all others.
 #'
 #' @export
 createRunSelfControlledCohortArgs <- function(firstExposureOnly = TRUE,

diff --git a/R/ResultsDataModel.R b/R/ResultsDataModel.R
@@ -128,7 +128,7 @@ migrateDataModel <- function(connectionDetails, databaseSchema, tablePrefix = ""
 #' @description
 #'
 #' Returns ResultModelManager DataMigrationsManager instance.
-# '@seealso [ResultModelManager::DataMigrationManager] which this function is a utility for.
+#' @seealso [ResultModelManager::DataMigrationManager] which this function is a utility for.
 #'
 #' @param connectionDetails             DatabaseConnector connection details object
 #' @param databaseSchema                String schema where database schema lives

diff --git a/vignettes/UsingSelfControlledCohort.Rmd b/vignettes/UsingSelfControlledCohort.Rmd
@@ -139,7 +139,7 @@ association between every drug and every condition in a CDM database.
 The Eunomia package provides a mini CDM that runs entirely within R that
 we can use for examples.
 
-```{r, include=FALSE}
+```{r, include=FALSE, eval=F}
 library(SelfControlledCohort)
 library(Eunomia)
 
@@ -149,13 +149,14 @@ result <- runSelfControlledCohort(
   connectionDetails = connectionDetails,
   cdmDatabaseSchema = "main",
   exposureIds = '', # include all drugs as exposures
-  outcomeIds = '' # include all conditions as outcomes
+  outcomeIds = '', # include all conditions as outcomes
+  resultExportPath = "scc_result"
 )
 ```
 
-```{r, message=F}
+```{r, eval=F, message=F}
 library(dplyr)
-result$estimates %>% 
+estimates %>%
   arrange(desc(irr)) %>% 
   head()
 ```
@@ -165,8 +166,8 @@ exposure-outcome pair along with all the relevant statistics. Let's
 interpret the results for amoxicillin exposure (concept ID 1713332) and
 the outcome of Chronic sinusitis (concept ID 257012)
 
-```{r}
-example <- result$estimates %>% 
+```{r eval=F}
+example <- estimates %>%
   filter(exposureId == 1713332, outcomeId == 257012) 
 
 example %>% 
@@ -217,7 +218,7 @@ schema in your CDM database.
 We will simulate using cohorts created in Atlas by using the pre-built
 cohorts in Eunomia.
 
-```{r, include=FALSE}
+```{r, eval=F, include=FALSE}
 Eunomia::createCohorts(connectionDetails)
 ```
 
@@ -226,19 +227,22 @@ outcome of GiBleed (cohort \#3). Even though this is a drug-condition
 pair these cohorts could be defined using combinations of data elements
 from any domain.
 
-```{r, include=FALSE}
+```{r, eval=F, include=FALSE}
 
-result <- runSelfControlledCohort(
+runSelfControlledCohort(
   connectionDetails = connectionDetails,
   cdmDatabaseSchema = "main",
   exposureIds = c(1,4), # The NSAIDs cohort #4 and the Celecoxib cohort #1
   outcomeIds = 3, # The GiBleed cohort #3
   exposureTable = "cohort",
   outcomeTable = "cohort",
+  resultExportPath = "scc_result"
 )
+
+estimates <- read.csv("results/scc_result")
 ```
 
-```{r}
+```{r eval=F}
 result$estimates %>% 
   filter(exposureId == 4, outcomeId == 3) %>% 
   tidyr::gather() %>% 
@@ -259,7 +263,7 @@ Let's demonstrate custom exposure outcome pairs using SQL by asking "Do
 patients tend to get more measurements in the year after a condition
 diagnosis than the year before?"
 
-```{r}
+```{r eval=F}
 con <- DatabaseConnector::connect(connectionDetails)
 ```
 
@@ -286,7 +290,7 @@ from condition_occurrence
 ```
 
 ```{r, include=FALSE}
-result <- runSelfControlledCohort(
+runSelfControlledCohort(
   connectionDetails = connectionDetails,
   cdmDatabaseSchema = "main",
   exposureIds = '', # use all rows in exposure table
@@ -299,14 +303,15 @@ result <- runSelfControlledCohort(
   riskWindowEndExposed = 365,
   riskWindowEndUnexposed = -1,
   riskWindowStartUnexposed = -365,
+  resultExportPath = "scc_result"
 )
 ```
 
 Using the `riskWindow` arguments we can set the time at risk to one year
 before and one year after each condition exposure.
 
-```{r}
-result$estimates %>% 
+```{r eval=F}
+estimates %>% 
   tidyr::gather() %>% 
   mutate(value = format(round(value,1), scientific = F)) %>% 
   rename(column = key) %>% 
@@ -328,7 +333,7 @@ We will create one analysis that uses 30 day exposure windows and another with
 365 day exposure windows.
 
 
-```{r}
+```{r eval=F}
 	
 sccArgs1 <- createRunSelfControlledCohortArgs(firstExposureOnly = TRUE,
                                               firstOutcomeOnly = TRUE,
@@ -387,7 +392,7 @@ be concept IDs if you are using the condition_occurrence and drug_era tables or
 cohort IDs if you are using a cohort table for exposures and outcomes. 
 
 
-```{r}
+```{r eval=F}
 
 exposureOutcomeList <- list(createExposureOutcome(exposureId = 4, outcomeId = 3),
                             createExposureOutcome(exposureId = 1, outcomeId = 3))
@@ -398,7 +403,7 @@ exposureOutcomeList <- list(createExposureOutcome(exposureId = 4, outcomeId = 3)
 The total number of rate ratios will be `length(sccAnalysisList) * length(exposureOutcomesList)`
 since every analysis will be executed for every exposure-outcome pair.
 
-```{r}
+```{r eval=F}
 	
 results <- runSccAnalyses(connectionDetails,
                           cdmDatabaseSchema = "main",