-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path03-Simulation.R
140 lines (110 loc) · 5.45 KB
/
03-Simulation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
library(simsurv)
library(rstanarm)
library(tidyverse)
library(RISCA)
ggplot2::theme_set(tidybayes::theme_tidybayes())
# ---- exponential model ----
# simulate data for an exponential model with a fixed treatment effect
set.seed(1234)
covs <- data.frame(id = 1:300, trt = stats::rbinom(300, 1L, 0.5))
s1 <- simsurv(lambdas = 0.1, betas = c(trt = -0.5),
dist = 'exponential',
x = covs, maxt = 50) %>%
mutate(true_lambda = 0.1, true_beta = -0.5, dist = 'exponential')
sim_exp <- left_join(covs, s1, by = 'id')
# plot K-M curve
sim_km <- prep_km_data(sim_exp, Surv(time = eventtime, event = status) ~ trt)
sim_km %>%
ggplot(., aes(x = time, y = estimate, group = trt, colour = trt)) +
geom_step() +
ggtitle('Simulated survival data (exponential hazard)')
# estimate RMST
sim_rmst <- RISCA::rmst(times = sim_km$time, surv.rates = sim_km$estimate, max.time = 20)
coxph(formula = Surv(time = eventtime, event = status) ~ trt,
data = sim_exp)
# Fit stan_surv to these data
sim_fit <- stan_surv(
formula = Surv(time = eventtime, event = status) ~ trt,
data = sim_exp,
basehaz = 'exp',
prior_intercept = normal(0, 1))
# print summary
sim_fit
# plot treatment effect
tidybayes::gather_draws(sim_fit, trt) %>%
ggplot(., aes(x = .value)) +
stat_dotsinterval() +
geom_vline(data = sim_exp, aes(xintercept = true_beta)) +
scale_y_continuous('density') +
scale_x_continuous('Beta[trt]') +
ggtitle('Posterior Estimates of Treatment Beta fit to Simulated Data') +
labs(caption = glue::glue('Showing posterior draws for a single replicate truncated at 50',
'\nVertical line shows true value used for simulation ({scales::comma(-0.5, accuracy = 0.01)})'))
# ---- test sensitivity to sample size ----
simulate_data <- function(n, balance = 0.5, maxt = 50, seed = 4243) {
set.seed(seed)
covs <- data.frame(id = seq_len(n), trt = stats::rbinom(n, 1L, 0.5))
s1 <- simsurv(lambdas = 0.1, betas = c(trt = -0.5), dist = 'exponential',
x = covs, maxt = maxt) %>%
mutate(true_lambda = 0.1, true_beta = -0.5, dist = 'exponential')
sim_exp <- left_join(covs, s1, by = 'id')
}
scenarios <- expand_grid(
n = seq(from = 50, to = 100, by = 50),
maxt = seq(from = 40, to = 60, by = 10),
seed = seq_len(3))
simulations <- purrr::pmap(scenarios, simulate_data)
all_sims <- bind_rows(simulations, .id = 'scenario')
fits <- simulations %>%
purrr::map(
~ stan_surv(formula = Surv(time = eventtime, event = status) ~ trt,
data = .x, basehaz = 'exp',
prior_intercept = normal(0, 1))
)
estimates <- fits %>%
purrr::map_dfr(tidybayes::gather_draws, trt, .id = 'scenario') %>%
dplyr::mutate(scenario = as.integer(scenario)) %>%
dplyr::left_join(scenarios %>% dplyr::mutate(scenario = row_number()))
ggplot(estimates, aes(x = .value, y = factor(maxt),
group = factor(maxt), colour = factor(maxt))) +
stat_dotsinterval(position = position_dodge(width = 0.5)) +
facet_wrap( ~ stringr::str_c('Sample size = ', n), scale = 'free_y') +
geom_vline(data = all_sims, aes(xintercept = true_beta), colour = 'gray10', linetype = 'dotted') +
scale_y_discrete('Max follow-up time') +
scale_x_continuous('Treatment beta') +
labs(caption = glue::glue('Showing posterior draws for three independent replicates truncated at different timepoints',
'\nVertical line shows true value used for simulation ({scales::comma(unique(all_sims$true_beta), accuracy = 0.01)})')
) +
ggtitle('Simulation study of Posterior Beta')
ggplot(estimates, aes(x = .value, y = factor(maxt),
group = factor(maxt), fill = stat(x < 0))) +
stat_dotsinterval(position = position_dodge(width = 0.5), quantiles = 50) +
facet_wrap( ~ stringr::str_c('Sample size = ', n), scale = 'free_y') +
geom_vline(data = all_sims, aes(xintercept = true_beta), colour = 'gray10', linetype = 'dotted') +
scale_y_discrete('Max follow-up time') +
scale_x_continuous('Treatment beta') +
labs(caption = glue::glue('Showing posterior draws for three independent replicates truncated at different timepoints',
'\nVertical line shows true value used for simulation ({scales::comma(unique(all_sims$true_beta), accuracy = 0.01)})')
) +
ggtitle('Simulation study of Posterior Beta')
ggplot(estimates, aes(y = .value, x = factor(maxt), colour = factor(seed), group = factor(seed))) +
stat_pointinterval(position = position_dodge(width = 0.2), .width = c(.5, .8)) +
facet_wrap( ~ stringr::str_c('Sample size = ', n), scale = 'free_y', ncol = 1) +
geom_hline(data = all_sims, aes(yintercept = true_beta), colour = 'gray10', linetype = 'dotted') +
scale_x_discrete('Max followup time (maxt)') +
scale_y_continuous('Treatment beta') +
labs(caption = glue::glue('Showing posterior median and {glue::glue_collapse(scales::percent(c(0.5, 0.8)), sep = "/")} CrI',
'\nHorizontal line shows true value used for simulation ({scales::comma(unique(all_sims$true_beta), accuracy = 0.01)})')
) +
ggtitle('Summary of Posterior Beta estimate fit to simulated data')
# ---- check performance using loo ----
sim_loo <- loo(sim_fit)
# compare against a model with a different baseline hazard
sim_fit_alt <- stan_surv(
formula = Surv(time = eventtime, event = status) ~ trt,
data = sim_exp,
basehaz = 'ms',
prior_intercept = normal(0, 1))
sim_loo_alt <- loo(sim_fit_alt)
# Compare models
loo_compare(sim_loo, sim_loo_alt)