-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathAlcBinge_stapm.R
150 lines (110 loc) · 5.91 KB
/
AlcBinge_stapm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#' Calculate variables to inform alcohol binge model - STAPM version
#'
#' Designed to work with simulated individual trajectories of alcohol consumption - stratified by
#' age category, sex and IMD quintile. Assigns coefficients stratified by age category, sex and IMD quintile to
#' the simulated sample of individuals to estimate their characteristics of single occasion drinking
#' at each time step in the simulation.
#'
#' The coefficients used come originally from a study by Hill-McManus 2014,
#' who analysed drinking occasions using data from detailed diaries in the National Diet and Nutrition Survey 2000/2001.
#' Using the results, it possible to model each individual's expected number of drinking occasions across the year,
#' the average amount they drunk on an occasion, the variability in the amount drunk among occasions,
#' and how these vary socio-demographically.
#'
#' To get these coefficients into a form in which they fit with the age, sex and IMD quintile
#' stratification of the STAPM model required assigning them to a sample of Health Survey for England data (2011-2017)
#' based on a full range of covariates, and then calculating weighted averages by age category, sex and IMD quintile.
#'
#' This function is designed to be applied at each time step during a STAPM model run.
#'
#' @param data Data table of individual characteristics - the variables used are average weekly alcohol consumption,
#' age, sex and IMD quintile.
#' @param params List of four data tables - three containing coefficient estimates from Hill-McManus et al
#' averaged by age category, sex and IMD quintile, and the fourth containing estimates of individual height and weight
#' averaged by age category, sex and IMD quintile.
#'
#' @return Returns data plus the estimated variables.
#' @importFrom data.table := setDT setnames
#' @export
#'
#'
#' @examples
#'
#'\dontrun{
#'
#' # Simulate individual data
#'
#' # Using the parameters for the Gamma distribution from Kehoe et al. 2012
#' n <- 1e3
#' grams_ethanol_day <- rgamma(n, shape = 0.69, scale = 19.03)
#'
#' data <- data.table(
#' weekmean = grams_ethanol_day * 7 / 8,
#' age = rpois(n, 30),
#' sex = sample(x = c("Male", "Female"), size = n, replace = T),
#' imd_quintile = "5_most_deprived"
#' )
#'
#' test_data <- AlcBinge_stapm(data)
#'}
#'
AlcBinge_stapm <- function(
data,
params = tobalcepi::binge_params_stapm
) {
# Create age category variable -
# to match the age categories used to summarise the coefficients from Hill-McManus et al
data[ , age_cat :=
c("11-15", "16-17", "18-24", "25-34", "35-44", "45-54", "55-64", "65-74", "75-89")[
findInterval(age, c(-1, 16, 18, 25, 35, 45, 55, 65, 75))]]
# Calculate the expected number of weekly drinking occasions
data <- merge(data, params[[1]],
by = c("age_cat", "sex", "imd_quintile"),
all.x = T, all.y = F, sort = F)
data[ , drink_freq := exp(mean_consump_coef * log(weekmean) + age_coef + imd_coef + const_coef)]
data[ , `:=`(mean_consump_coef = NULL, age_coef = NULL, imd_coef = NULL, const_coef = NULL)]
data[weekmean == 0, drink_freq := 0]
################
# Calculate expected standard deviation of a drinking occasions
# Step one: calculate probability of having 3 or more drinking occasions in a week
data <- merge(data, params[[2]],
by = c("age_cat", "sex", "imd_quintile"),
all.x = T, all.y = F, sort = F)
data[ , drink_3_or_more := VGAM::probitlink(mean_consump_coef * log(weekmean) + age_coef + imd_coef + const_coef, inverse = T)]
data[ , `:=`(mean_consump_coef = NULL, age_coef = NULL, imd_coef = NULL, const_coef = NULL)]
data[weekmean == 0, drink_3_or_more := 0]
# Step 2: calculate inverse mills ratio
# Formula taken from Hill-McManus 2014
# standard normal density function / (1 - standard normal cumulative distribution function)
data[ , imr := stats::dnorm(drink_3_or_more) / (1 - stats::pnorm(drink_3_or_more))]
# Step 3 : calculate the predicted occasion level standard deviation
# (variation in the quantity consumed in a drinking occasion)
data <- merge(data, params[[3]],
by = c("age_cat", "sex", "imd_quintile"),
all.x = T, all.y = F, sort = F)
data[ , occ_sd := exp(mean_consump_coef * log(weekmean) + imd_coef + imr_coef * imr) / 8]
# The paper appears to say it is linear regression, but after confirming with Dan, the y (i.e., standard deviation)
# is acutally logged. The paper also not clear regarding measurements.
# But it turned to be units for all weekly consumption (independent variable) and gram for standard deviation of the model.
# hence divided by 8.
data[ , `:=`(mean_consump_coef = NULL, imd_coef = NULL, imr_coef = NULL, imr = NULL, drink_3_or_more = NULL)]
# calculate the average quantity of alcohol consumed during a drinking occasion,
# obtained using the mean weekly consumption divided
# by the predicted number of weekly drinking occasions.
data[ , mean_sod := weekmean / drink_freq]
data[weekmean == 0, mean_sod := 0]
# Calculate the Wildemark r value for each individual using their weight and height from the HSE
# described in Watson 1981
data <- merge(data, params[[4]],
by = c("age_cat", "sex", "imd_quintile"),
all.x = T, all.y = F, sort = F)
# Note that the numbers in the formulae below come from "The Estimation of Blood Alcohol Concentration - Widmark Revisited" - Posey and Mozayani
# height must be in meters and weight in kg
if(max(data$height, na.rm = T) > 100) {
warning("AlcBinge: height in cm not m - correct the input data")
}
data[sex == "Male", rwatson := 0.39834 + ((12.725 * height - 0.11275 * age + 2.8993) / weight)]
data[sex == "Female", rwatson := 0.29218 + ((12.666 * height - 2.4846) / weight)]
data[ , `:=`(age_cat = NULL, height = NULL)]
return(data)
}