LegendMedCentral/dbPaper.rmd

---
csl: pnas.csl

output:
  pdf_document:
    fig_caption: yes
  html_document: default
bibliography: bibliography.bib
params:
  databaseId: "MDCR"
  targetId: 739138
  comparatorId: 715259
  outcomeId: 18
  setTitle: "A Comparison of Sertraline to Duloxetine for the Risk of Stroke in the MDCD Database."
title: "`r params$setTitle`"
---


```{r, echo=FALSE, message=FALSE, comment=FALSE, warning=FALSE, results='hide'}
library(DatabaseConnector)
library(knitr)
library(kableExtra)
source("DataPulls.R")
source("PlotsAndTables.R")
options(knitr.kable.NA = '')

# params <- list(databaseId = "MDCR",
#                targetId = 739138,
#                comparatorId =  715259,
#                outcomeId = 18)

useStoredObject <- FALSE

if (!useStoredObject) {
  # connectionDetails <- createConnectionDetails(dbms = "postgresql",
  #                                              server = "localhost/ohdsi",
  #                                              user = "postgres",
  #                                              password = Sys.getenv("pwPostgres"),
  #                                              schema = "legend")
  connectionDetails <- createConnectionDetails(dbms = "postgresql",
                                               server = paste(Sys.getenv("legendServer"), Sys.getenv("legendDatabase"), sep = "/"),
                                               port = Sys.getenv("legendPort"),
                                               user = Sys.getenv("legendUser"),
                                               password = Sys.getenv("legendPw"),
                                               schema = Sys.getenv("legendSchema"))
  connection <- connect(connectionDetails)
  targetName <- getExposureName(connection = connection, exposureId = params$targetId)
  comparatorName <- getExposureName(connection = connection, exposureId = params$comparatorId)
  outcomeName <- getOutcomeName(connection = connection, outcomeId = params$outcomeId)
  analyses <- getAnalyses(connection = connection)
  databaseDetails <- getDatabaseDetails(connection = connection,
                                        databaseId = params$databaseId)
  studyPeriod <- getStudyPeriod(connection = connection,
                                targetId = params$targetId,
                                comparatorId = params$comparatorId,
                                databaseId = params$databaseId)
  mainResults <- getMainResults(connection = connection,
                                targetIds = params$targetId,
                                comparatorIds = params$comparatorId,
                                outcomeIds = params$outcomeId,
                                databaseIds = params$databaseId,
                                analysisIds = c(1, 2, 3, 4))
  
  subgroupResults <- getSubgroupResults(connection = connection,
                                        targetIds = params$targetId,
                                        comparatorIds = params$comparatorId,
                                        outcomeIds = params$outcomeId,
                                        databaseIds = params$databaseId)
  
  controlResults <- getControlResults(connection = connection,
                                      targetId = params$targetId,
                                      comparatorId = params$comparatorId,
                                      analysisId = 1,
                                      databaseId = params$databaseId)
  
  attrition <- getAttrition(connection = connection,
                            targetId = params$targetId,
                            comparatorId = params$comparatorId,
                            outcomeId = params$outcomeId,
                            analysisId = 1,
                            databaseId = params$databaseId)
  
  followUpDist <- getCmFollowUpDist(connection = connection,
                                    targetId = params$targetId,
                                    comparatorId = params$comparatorId,
                                    outcomeId = params$outcomeId,
                                    databaseId = params$databaseId,
                                    analysisId = 1)
  
  balance <- getCovariateBalance(connection = connection,
                                 targetId = params$targetId,
                                 comparatorId = params$comparatorId,
                                 databaseId = params$databaseId,
                                 analysisId = 2)
  
  popCharacteristics <- getCovariateBalance(connection = connection,
                                            targetId = params$targetId,
                                            comparatorId = params$comparatorId,
                                            databaseId = params$databaseId,
                                            analysisId = 1,
                                            outcomeId = params$outcomeId)
  
  ps <- getPs(connection = connection,
              targetIds = params$targetId,
              comparatorIds = params$comparatorId,
              databaseId = params$databaseId)
  
  kaplanMeier <- getKaplanMeier(connection = connection,
                                targetId = params$targetId,
                                comparatorId = params$comparatorId,
                                outcomeId = params$outcomeId,
                                databaseId = params$databaseId,
                                analysisId = 2)
} else {
  load("paperData.rda")
}
```

\centerline{Martijn J. Schuemie$^{1,2}$}
\centerline{Marc A. Suchard$^{1,3,4,5}$}
\centerline{George M. Hripcsak$^{1,6}$}
\centerline{Patrick B. Ryan$^{1,2,6}$}
\centerline{David Madigan$^{1,7}$}

$^{1}$ Observational Health Data Sciences and Informatics, New York, NY
$^{2}$ Janssen Research & Development, Titusville, NJ
$^{3}$ Department of Biomathematics, University of Califoria, Los Angeles, CA
$^{4}$ Department of Biostatistics, University of Califoria, Los Angeles, CA
$^{5}$ Department of Human Genetics, University of Califoria, Los Angeles, CA
$^{6}$ Department of Biomedical Informatics, Columbia University, New York, NY
$^{7}$ Department of Statistics, Columbia University, New York, NY

Corresponding author: Martijn J. Schuemie, Janssen R&D, 1125 Trenton Harbourton Road, Titusville, NJ, 08560, Phone: +31 631793897, schuemie@ohdsi.org

# Abstract

To do

# Introduction

This is a very important study. Here's a really cool paper @pmid23900808.

# Methods

The study spanned the period from `r studyPeriod$minDate` until `r studyPeriod$minDate`.

## Data source

`r databaseDetails$description`

# Results

```{r, echo = FALSE, fig.width=6, fig.height=7.5, out.width = '50%', fig.align='center'}
drawAttritionDiagram(attrition, targetName, comparatorName)
```
**Figure 1**. Attrition diagram.


**Table 1**. Select population characteristics
```{r, echo = FALSE}
table <- prepareTable1(popCharacteristics)
# Remove 1st header, with add back later with merged columns:
header <- as.character(table[1, ])
header[header == "1"] <- ""
table <- table[-1, ]
# Indentation needs to be made explicit (not by leading spaces):
needIndent <- which(substr(table[, 1], 1, 1) == " ")

kable_styling(add_indent(add_header_above(kable(table, "latex", 
                                                booktabs = TRUE, 
                                                longtable = TRUE,
                                                row.names = FALSE, 
                                                col.names = header,
                                                linesep = "",
                                                align = c("l", "r", "r", "r", "r", "r", "r")), 
                                          c("", "Before stratification" = 3, "After stratification" = 3)),
                         needIndent),
              font_size = 7,
              latex_options = c("HOLD_position", "repeat_header"))

```

**Table 2**. Number of subjects, follow-up time (in days), number of outcome events, and event incidence rate (IR) per 1,000 patient years (PY) in the target and comparator group after stratification or matching, as well as the minimum detectable relative risk (MDRR). Note that the IR does not account for any stratification or matching.
```{r, echo = FALSE}
table <- preparePowerTable(mainResults, analyses)

header <- c("Analysis", "Target", "Comp.", "Target", "Comp.", "Target", "Comp.", "Target", "Comp.", "MDRR")
kable_styling(add_header_above(kable(table, "latex", 
                                                booktabs = TRUE, 
                                                row.names = FALSE, 
                                                col.names = header,
                                                align = c("l", "r", "r", "r", "r", "r", "r", "r", "r", "r")), 
                                          c("", "Subjects" = 2, "PYs" = 2, "Outcomes" = 2, "IR (per 1,000 PY)" = 2, "")),
              font_size = 7,
              latex_options = c("HOLD_position"))

```


**Table 2**. Time (days) at risk distribution expressed as minimum (Min), 10th Percentile (P10), 25th percentile (P25), median, 75th percentile (P75), 90th percentile (P90) and maximum (Max) in the target and comparator cohort after stratification.
```{r, echo = FALSE}
table <- prepareFollowUpDistTable(followUpDist)
kable_styling(kable(table, "latex", 
                    booktabs = TRUE, 
                    longtable = FALSE,
                    row.names = FALSE, 
                    linesep = "",
                    align = c("l", "r", "r", "r", "r", "r", "r", "r")),
              font_size = 8,
              latex_options = c("HOLD_position"))

```

```{r, echo = FALSE, fig.width=5, fig.height=3.5, out.width = '50%', fig.align='center'}
plotPs(ps, targetName, comparatorName)
```
**Figure 2**. Preference score distribution. The preference score is a transformation of the propensity score that adjusts for differences in the sizes of the two treatment groups. A higher overlap indicates subjects in the two groups were more similar in terms of their predicted probability of receiving one treatment over the other.

```{r, echo = FALSE, fig.width=4, fig.height=4, out.width = '50%', fig.align='center', warning=FALSE}
plotCovariateBalanceScatterPlot(balance, beforeLabel = "Before stratification", afterLabel = "After stratification")
```
**Figure 3**. Covariate balance before and after stratification. Each dot represents the standardizes difference of means for a single covariate before and after stratification on the propensity score. 

```{r, echo = FALSE, fig.width=12, fig.height=4, out.width = '100%', fig.align='center', warning=FALSE}
plotScatter(controlResults)
```
**Figure 4**. Systematic error 


**Table 3**. Hazard ratios, 95% confidence intervals, uncalibrated and empirically calibrated, for various analyses.
```{r, echo = FALSE}
table <- prepareMainResultsTable(mainResults, analyses)
kable_styling(kable(table, "latex", 
                    booktabs = TRUE, 
                    longtable = FALSE,
                    row.names = FALSE, 
                    linesep = ""),
              font_size = 8,
              latex_options = c("HOLD_position"))
```

```{r, echo = FALSE, fig.width=7, fig.height=5, out.width = '100%', fig.align='center', results='hide'}
plotKaplanMeier(kaplanMeier, targetName, comparatorName)
```
**Figure 3**. Kaplan Meier plot, showing survival as a function of time. This plot
is adjusted for the propensity score stratification: The target curve (<em>`r targetName`</em>) shows the actual observed survival. The
comparator curve (<em>`r comparatorName`</em>) applies reweighting to approximate the counterfactual of what the target survival
would look like had the target cohort been exposed to the comparator instead. The shaded area denotes
the 95 percent confidence interval.


**Table 4**. Subgroup interactions
```{r, echo = FALSE}
table <- prepareSubgroupTable(subgroupResults)

header <- c("Subgroup", "Target", "Comparator", "HRR (95% CI)", "P" ,"Cal. P", "HRR (95% CI)", "P" ,"Cal. P")
kable_styling(add_header_above(kable(table, "latex", 
                                                booktabs = TRUE, 
                                                row.names = FALSE, 
                                                col.names = header,
                                                align = c("l", "r", "r", "r", "r", "r", "r", "r", "r")), 
                                          c("", "Subjects" = 2, "On-treatment" = 3, "Intent-to-treat" = 3)),
              font_size = 8,
              latex_options = c("HOLD_position"))

```

# Conclusions

# References

```{r, echo=FALSE, message=FALSE, comment=FALSE, warning=FALSE, results='hide'}
if (!useStoredObject) {
  DatabaseConnector::disconnect(connection)
}
```