From 02f23f41a8fb86ad305dd8bf669dab7af1fe776f Mon Sep 17 00:00:00 2001
From: Duncan Gillespie <duncan.gillespie@sheffield.ac.uk>
Date: Sun, 18 Dec 2022 19:22:13 +0000
Subject: [PATCH] add stapm version of alcohol binge parameters for Scotland

---
 .gitignore                                    |   3 +
 R/binge_params.R                              |  25 ++-
 _pkgdown.yml                                  |   1 +
 data-raw/binge_params/20_working_code.R       |  41 ----
 .../10_HSE_variable_processing.R              |   0
 .../{ => England}/12_Imputation.R             |   0
 .../{ => England}/binge_params_for_stapm.R    |   0
 data-raw/binge_params/README                  |  21 ++
 .../binge_params/Scotland/10_clean_shes.R     | 142 +++++++++++++
 .../Scotland/15_Imputation_shes.R             |  99 ++++++++++
 .../Scotland/binge_params_for_stapm.R         | 186 ++++++++++++++++++
 data/binge_params_stapm_scot.rda              | Bin 0 -> 4730 bytes
 man/binge_params.Rd                           |   4 +-
 man/binge_params_stapm.Rd                     |   2 +-
 man/binge_params_stapm_scot.Rd                |  22 +++
 15 files changed, 492 insertions(+), 54 deletions(-)
 delete mode 100644 data-raw/binge_params/20_working_code.R
 rename data-raw/binge_params/{ => England}/10_HSE_variable_processing.R (100%)
 rename data-raw/binge_params/{ => England}/12_Imputation.R (100%)
 rename data-raw/binge_params/{ => England}/binge_params_for_stapm.R (100%)
 create mode 100644 data-raw/binge_params/README
 create mode 100644 data-raw/binge_params/Scotland/10_clean_shes.R
 create mode 100644 data-raw/binge_params/Scotland/15_Imputation_shes.R
 create mode 100644 data-raw/binge_params/Scotland/binge_params_for_stapm.R
 create mode 100644 data/binge_params_stapm_scot.rda
 create mode 100644 man/binge_params_stapm_scot.Rd

diff --git a/.gitignore b/.gitignore
index 32143f9..8655be4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,6 @@ dev/
 .DS_Store
 *.pdf
 data-raw/binge_params/*.csv
+.rds
+data-raw/binge_params/Scotland/*.rds
+
diff --git a/R/binge_params.R b/R/binge_params.R
index 875a740..8d9817c 100644
--- a/R/binge_params.R
+++ b/R/binge_params.R
@@ -1,5 +1,5 @@
 
-#' Parameters to estimate amount drunk on single occasions - STAPM version
+#' Parameters to estimate amount drunk on single occasions - England - STAPM version
 #' 
 #' As our starting point we use the parameter estimates from Hill-McManus et al 2014 - stored within the `tobalcepi` package as the data object `binge_params`. 
 #' The problem with using these parameters directly in STAPM is that STAPM does not model the individual life-course trajectories of 
@@ -17,14 +17,25 @@
 #'
 #' @source Hill-McManus et al 2014. "Estimation of usual occasion-based individual drinking patterns using diary survey data". https://doi.org/https://doi.org/10.1016/j.drugalcdep.2013.09.022. 
 #'
+"binge_params_stapm"
+
+#' Parameters to estimate amount drunk on single occasions - Scotland - STAPM version
+#' 
+#' Scottish version of `binge_params_stapm` based on the Scottish Health Survey years 2008 to 2019.   
 #'
+#' @docType data
 #'
+#' @format A list of four data tables (1 = Negative binomial regression model for the number of weekly drinking occasions, 
+#' 2 = Fitted Heckman selection model for probability that an individual drinks on at least 3 separate occasions during the diary period, 
+#' 3 = Fitted Heckman outcome regression results for the standard deviation in the quantity of alcohol consumed in a drinking occasion, 
+#' 4 = average height and weight)
 #'
+#' @source Hill-McManus et al 2014. "Estimation of usual occasion-based individual drinking patterns using diary survey data". https://doi.org/https://doi.org/10.1016/j.drugalcdep.2013.09.022. 
 #'
-"binge_params_stapm"
+"binge_params_stapm_scot"
 
 
-#' Parameters to estimate amount drunk on single occassions
+#' Parameters to estimate amount drunk on single occasions
 #' 
 #' We use parameter estimates from Hill-McManus et al 2014 -   
 #' 
@@ -35,9 +46,7 @@
 #' Table 6 - Fitted Heckman outcome regression results for the standard deviation in the quantity of alcohol consumed in a drinking occasion   
 #' 
 #' We do not use parameter estimates from Table 4 - Fitted Tobit regression model for the mean grams of alcohol consumed during a drinking occasion. 
-#' This is because we calculate from the data by dividing the weekly mean alcohol consumption by the estimated number of weekly drinking occassions.  
-#'
-#'
+#' This is because we calculate from the data by dividing the weekly mean alcohol consumption by the estimated number of weekly drinking occasions.  
 #'
 #' @docType data
 #'
@@ -45,10 +54,6 @@
 #'
 #' @source Hill-McManus et al 2014. "Estimation of usual occasion-based individual drinking patterns using diary survey data". https://doi.org/https://doi.org/10.1016/j.drugalcdep.2013.09.022. 
 #'
-#'
-#'
-#'
-#'
 "binge_params"
 
 
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 9e2d7f3..fd27166 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -80,6 +80,7 @@ reference:
   - WArisk_acute
   - binge_params
   - binge_params_stapm
+  - binge_params_stapm_scot
 - subtitle: "Alcohol lag times"
   contents:
   - AlcLags
diff --git a/data-raw/binge_params/20_working_code.R b/data-raw/binge_params/20_working_code.R
deleted file mode 100644
index cfaaeb2..0000000
--- a/data-raw/binge_params/20_working_code.R
+++ /dev/null
@@ -1,41 +0,0 @@
-
-# The aim of this code is to develop and test the method to 
-# model acute harms that are partially attributable to alcohol in STAPM
-
-library(data.table)
-library(tobalcepi)
-
-# Read the imputed HSE data sample
-data <- fread("data/HSE_2011_to_2017_imputed.csv")
-
-setnames(data, c("ethnicity_2cat", "height", "weight"), c("ethnic2cat", "htval", "wtval"))
-
-# Test the existing AlcBinge function on these data
-# This is the SAPM method
-data_sapm_test <- tobalcepi::AlcBinge(data)
-
-
-# adapt the method to predict the number of drinking occassions 
-# and the distribution of amount consumed 
-# as a function of the average weekly amount of alcohol consumed, age, sex and IMD quintile. 
-
-# assign the parameter values from Hill-McManus et al. to each individual 
-# and then average these values by age, sex and IMD quintile 
-
-# the IMD quintile averages will then reflect the variation among IMD quintiles 
-# in the distribution of ethnicity, income, number of children etc.
-
-# a copy of the AlcBinge function in tobalcepi has been moved to src for development
-# function renamed AlcBinge_dev
-
-data_sapm_test <- AlcBinge_dev(data)
-
-
-
-
-
-
-
-
-
-
diff --git a/data-raw/binge_params/10_HSE_variable_processing.R b/data-raw/binge_params/England/10_HSE_variable_processing.R
similarity index 100%
rename from data-raw/binge_params/10_HSE_variable_processing.R
rename to data-raw/binge_params/England/10_HSE_variable_processing.R
diff --git a/data-raw/binge_params/12_Imputation.R b/data-raw/binge_params/England/12_Imputation.R
similarity index 100%
rename from data-raw/binge_params/12_Imputation.R
rename to data-raw/binge_params/England/12_Imputation.R
diff --git a/data-raw/binge_params/binge_params_for_stapm.R b/data-raw/binge_params/England/binge_params_for_stapm.R
similarity index 100%
rename from data-raw/binge_params/binge_params_for_stapm.R
rename to data-raw/binge_params/England/binge_params_for_stapm.R
diff --git a/data-raw/binge_params/README b/data-raw/binge_params/README
new file mode 100644
index 0000000..9c2ec7e
--- /dev/null
+++ b/data-raw/binge_params/README
@@ -0,0 +1,21 @@
+
+The aim of this code is to process the parameters that allow the modelling of 
+acute harms that are partially attributable to alcohol in STAPM
+
+# This is the SAPM method
+data_sapm_test <- tobalcepi::AlcBinge(data)
+
+adapt the method to predict the number of drinking occassions 
+and the distribution of amount consumed 
+as a function of the average weekly amount of alcohol consumed, age, sex and IMD quintile. 
+
+assign the parameter values from Hill-McManus et al. to each individual 
+and then average these values by age, sex and IMD quintile 
+
+the IMD quintile averages will then reflect the variation among IMD quintiles 
+in the distribution of ethnicity, income, number of children etc.
+
+
+
+
+
diff --git a/data-raw/binge_params/Scotland/10_clean_shes.R b/data-raw/binge_params/Scotland/10_clean_shes.R
new file mode 100644
index 0000000..e36f596
--- /dev/null
+++ b/data-raw/binge_params/Scotland/10_clean_shes.R
@@ -0,0 +1,142 @@
+
+# The aim of this code is to clean the tobacco and alcohol data from the Scottish Health Survey
+
+# note: no questions asked to < 16 year olds in Shes
+
+# Using functions in the hseclean package
+library(hseclean)
+library(data.table)
+library(magrittr)
+
+# Location of Scottish data
+root_dir <- "X:/HAR_PR/PR/Consumption_TA/HSE/Scottish Health Survey (SHeS)/"
+
+# The variables to retain
+keep_vars = c(
+  "hse_id", "wt_int", "psu", "cluster", "year", "quarter",
+  "age", "age_cat", "sex", "imd_quintile",
+  "ethnicity_2cat",
+  "degree", "marstat", "relationship_status", "employ2cat", "social_grade", "kids", "income5cat",
+  "nssec3_lab", "man_nonman", "activity_lstweek", "eduend4cat",
+
+  "hse_mental",
+  
+  "weight", "height",
+
+  "drinks_now",
+  "drink_freq_7d", "n_days_drink", "peakday", "binge_cat",
+  "beer_units", "wine_units", "spirit_units", "rtd_units",
+  "weekmean", "drating", "dnoft", "dnnow", "dnany", "dnevr",
+  "perc_spirit_units", "perc_wine_units", "perc_rtd_units", "perc_beer_units",
+  "drinker_cat",
+  #"spirits_pref_cat", "wine_pref_cat", "rtd_pref_cat", "beer_pref_cat",
+  "total_units7_ch",
+  
+  # Smoking
+  "cig_smoker_status",
+  "years_since_quit", "years_reg_smoker", "cig_ever",
+  "smk_start_age", "smk_stop_age", "censor_age", 
+  "cigs_per_day", "smoker_cat", "hand_rolled_per_day", "machine_rolled_per_day", "prop_handrolled", "cig_type")
+
+
+# Main processing
+
+cleandata <- function(data) {
+
+  data <- clean_age(data)
+  data <- clean_demographic(data)
+  data <- clean_education(data)
+  data <- clean_economic_status(data)
+  data <- clean_family(data)
+  data <- clean_income(data)
+  data <- clean_health_and_bio(data)
+
+  data <- alc_drink_now_allages(data)
+  data <- alc_weekmean_adult(data)
+  data <- alc_sevenday_adult(data)
+  
+  data <- smk_status(data)
+  data <- smk_former(data)
+  data <- smk_life_history(data)
+  data <- smk_amount(data)
+  
+  data <- select_data(
+    data,
+    ages = 16:89,
+    years = 2008:2019,
+    keep_vars = keep_vars,
+    complete_vars = c("age", "sex", "imd_quintile", "psu", "cluster", "year")
+  )
+
+  return(data)
+}
+
+shes_data <- combine_years(list(
+  cleandata(read_SHeS_2008(root = root_dir)),
+  cleandata(read_SHeS_2009(root = root_dir)),
+  cleandata(read_SHeS_2010(root = root_dir)),
+  cleandata(read_SHeS_2011(root = root_dir)),
+  cleandata(read_SHeS_2012(root = root_dir)),
+  cleandata(read_SHeS_2013(root = root_dir)),
+  cleandata(read_SHeS_2014(root = root_dir)),
+  cleandata(read_SHeS_2015(root = root_dir)),
+  cleandata(read_SHeS_2016(root = root_dir)),
+  cleandata(read_SHeS_2017(root = root_dir)),
+  cleandata(read_SHeS_2018(root = root_dir)),
+  cleandata(read_SHeS_2019(root = root_dir))
+))
+
+# Load population data for Scotland
+# from here - X:\ScHARR\PR_Mortality_data_TA\data\Processed pop sizes and death rates from VM
+
+scot_pops <- fread("X:/ScHARR/PR_Mortality_data_TA/data/Processed pop sizes and death rates from VM/pop_sizes_scotland_national_v1_2022-12-13_mort.tools_1.5.0.csv")
+setnames(scot_pops, c("pops"), c("N"))
+
+# adjust the survey weights according to the ratio of the real population to the sampled population
+shes_data <- clean_surveyweights(shes_data, pop_data = scot_pops)
+
+# remake age categories
+shes_data[, age_cat := c("16-17",
+                         "18-24",
+                         "25-34",
+                         "35-44",
+                         "45-54",
+                         "55-64",
+                         "65-74",
+                         "75-89")[findInterval(age, c(-1, 18, 25, 35, 45, 55, 65, 75, 1000))]]
+
+setnames(shes_data,
+         c("smk_start_age", "cig_smoker_status", "years_since_quit"),
+         c("start_age", "smk.state", "time_since_quit"))
+
+# Checks on data
+
+shes_data[(spirit_units + wine_units + rtd_units + beer_units) != weekmean]
+
+shes_data[drinker_cat != "abstainer" & weekmean == 0]
+
+# some drinkers have no data for average weekly consumption
+# remove these individuals from the dataset - rather than imputing the missing data
+shes_data <- shes_data[!(drinker_cat != "abstainer" & weekmean == 0)]
+
+shes_data[drinker_cat != "abstainer" & is.na(weekmean)]
+
+# select only rows with complete information on average weekly alcohol consumption
+shes_data <- shes_data[!is.na(weekmean)]
+
+shes_data[smoker_cat != "non_smoker" & cigs_per_day == 0]
+
+# select only rows with complete information on average weekly alcohol consumption
+shes_data <- shes_data[!is.na(smk.state)]
+
+nrow(shes_data)
+
+#shes_data[is.na(drinker_cat)]
+
+######## Write the data
+
+# note the package version so that the data can be tagged with it
+ver <- packageVersion("hseclean")
+
+saveRDS(shes_data, paste0("X:/ScHARR/PR_STAPM/Code/R_packages/tobalcepi/data-raw/binge_params/Scotland/tobalc_consumption_scot_national_2008-2019_v1_", Sys.Date(), "_hseclean_", ver, ".rds"))
+
diff --git a/data-raw/binge_params/Scotland/15_Imputation_shes.R b/data-raw/binge_params/Scotland/15_Imputation_shes.R
new file mode 100644
index 0000000..1c8ca82
--- /dev/null
+++ b/data-raw/binge_params/Scotland/15_Imputation_shes.R
@@ -0,0 +1,99 @@
+
+# This code reads the processed health survey datasets for each year
+# and conducts imputation.
+
+# all the variables to be imputed are categorical
+
+library(data.table)
+library(hseclean)
+
+# Load the data
+
+# choose the file output by 10_clean_shes.R
+
+data <- readRDS("X:/ScHARR/PR_STAPM/Code/R_packages/tobalcepi/data-raw/binge_params/Scotland/tobalc_consumption_scot_national_2008-2019_v1_2022-12-18_hseclean_1.9.2.rds")
+
+# sapply(data, class)
+
+# view variables with missingness
+misscheck <- function(var) {
+  x <- table(var, useNA = "ifany")
+  na <- x[which(is.na(names(x)))]
+  if(length(na) == 0) na <- 0
+  perc <- round(100 * na / sum(x), 2)
+  #return(c(paste0(na, " missing obs, ", perc, "%")))
+  return(na)
+}
+
+n_missing <- sapply(data, misscheck)
+missing_vars <- n_missing[which(n_missing > 0)]
+missing_vars
+
+# household equivalised income has the most missingness
+
+# The categorical variables involved
+var_names <- c(
+  "kids",
+  "relationship_status",
+  "ethnicity_2cat",
+  "eduend4cat",
+  "degree",
+  "nssec3_lab",
+  "employ2cat",# complete variable
+  "activity_lstweek",# complete variable
+  "income5cat",
+  "hse_mental",
+  "drinker_cat",
+  "social_grade",
+  "smk.state"
+)
+
+# Note that the imputation wont work unless the variables considered are
+# either subject to imputation or do not contain any missingness (i.e. are complete)
+
+# The variables to be imputed and the method to be used
+var_methods <- rep("", ncol(data))
+
+var_methods[which(var_names == "kids")] <- "polr"
+var_methods[which(var_names == "relationship_status")] <- "polyreg"
+var_methods[which(var_names == "ethnicity_2cat")] <- "logreg"
+var_methods[which(var_names == "eduend4cat")] <- "polyreg"
+var_methods[which(var_names == "degree")] <- "logreg"
+var_methods[which(var_names == "nssec3_lab")] <- "polyreg"
+var_methods[which(var_names == "income5cat")] <- "polr"
+var_methods[which(var_names == "hse_mental")] <- "logreg"
+var_methods[which(var_names == "social_grade")] <- "polyreg"
+
+
+# Set order of factors where needed for imputing as ordered.
+data[ , kids := factor(kids, levels = c("0", "1", "2", "3+"))]
+data[ , income5cat := factor(income5cat, levels = c("1_lowest_income", "2", "3", "4", "5_highest_income"))]
+
+
+# Impute missing values
+
+# Run the imputation
+imp <- impute_data_mice(
+  data = data,
+  var_names = var_names,
+  var_methods = var_methods,
+  n_imputations = 5
+  # for testing just do 1 imputation
+  # but test with more later
+  # for point estimates, apparently 2-10 imputations are enough
+)
+
+data_imp <- copy(imp$data)
+
+
+# note the package version so that the data can be tagged with it
+ver <- packageVersion("hseclean")
+
+saveRDS(data_imp, paste0("X:/ScHARR/PR_STAPM/Code/R_packages/tobalcepi/data-raw/binge_params/Scotland/tobalc_consumption_scot_national_2008-2019_v1_", Sys.Date(), "_hseclean_", ver, "_imputed.rds"))
+
+
+
+
+
+
+
diff --git a/data-raw/binge_params/Scotland/binge_params_for_stapm.R b/data-raw/binge_params/Scotland/binge_params_for_stapm.R
new file mode 100644
index 0000000..d40279b
--- /dev/null
+++ b/data-raw/binge_params/Scotland/binge_params_for_stapm.R
@@ -0,0 +1,186 @@
+
+# The aim of this code is to calculate weighted averages of the 
+# parameters from Hill-McManus et al 2014
+# to produce parameters for use in STAPM 
+# stratified by age, sex and IMD quintile
+
+# Base this on the Scottish Health Survey 2008 to 2019
+
+# do not save the intermediate datasets as don't want these uploaded to gitlab
+# (but for development purposes the intermediate datasets were saved locally)
+
+library(data.table)
+#library(tobalcepi)
+
+set.seed(1)
+
+# Read the imputed survey data sample
+data <- readRDS("data-raw/binge_params/Scotland/tobalc_consumption_scot_national_2008-2019_v1_2022-12-18_hseclean_1.9.2_imputed.rds")
+
+# Test the existing AlcBinge function on these data
+# This is the SAPM method
+#data <- tobalcepi::AlcBinge(data)
+
+# Replicate some of the code from the AlcBinge functions
+# with the aim of assigning the relevant parameters to the data before averaging them
+
+# add temporary age category
+data[, age_temp := c(
+  "<16", "16-17", "18-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54",
+  "55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85-89", "90+")[findInterval(age, c(-1, 16, 18, seq(20, 90, 5)))]]
+
+# coefficients based on 2014 Hill-McManus et al
+
+# run prep_binge_params.R
+
+# negative binomial regression model for the number of weekly drinking occasions - Table 3
+freq_model_coef <- binge_params[[1]]$coefficient
+
+# fitted Heckman selection model for probability that
+# an individual drinks on at least 3 separate occasions during the diary period - Table 5
+select_model_coef <- binge_params[[2]]$coefficient
+
+# fitted Heckman outcome regression results for the standard deviation
+# in the quantity of alcohol consumed in a drinking occasion - Table 6
+sdv_model_coef <- binge_params[[3]]$coefficient
+
+
+################################
+################################
+
+# calculate expected number of weekly drinking occasions, using freq_model_coef
+# This just creates a new column for each variable,
+# and allocates the individual a coefficient based on their characteristics.
+
+data[ , mean_consump_coef := freq_model_coef[1]]
+
+data[ , age_coef := 0]
+data[age_temp %in% c("25-29", "30-34"), age_coef := freq_model_coef[2]]
+data[age_temp %in% c("35-39", "40-44"), age_coef := freq_model_coef[3]]
+data[age_temp %in% c("45-49", "50-54"), age_coef := freq_model_coef[4]]
+
+# model applied to population below 65 years, but assume effect at 55-65 applies at older ages too
+data[age_temp %in% c("55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85-89", "90+"),
+     age_coef := freq_model_coef[5]]
+
+data[ , income_coef := 0]
+data[income5cat == "1_lowest_income", income_coef := freq_model_coef[6]]
+
+data[ , ethn_coef := 0]
+data[ethnicity_2cat == "non_white", ethn_coef := freq_model_coef[7]]
+
+data[ , leaveed_coef := 0]
+data[eduend4cat == "never_went_to_school", leaveed_coef := freq_model_coef[8]]
+data[eduend4cat == "15_or_under", leaveed_coef := freq_model_coef[9]]
+data[eduend4cat == "16-18", leaveed_coef := freq_model_coef[10]]
+
+data[ , child_coef := 0]
+data[kids == "1", child_coef := freq_model_coef[11]]
+data[kids == "2", child_coef := freq_model_coef[12]]
+data[kids == "3+", child_coef := freq_model_coef[13]]
+
+data[ , class_coef := 0]
+data[social_grade == "C2DE", class_coef := freq_model_coef[14]]
+
+data[ , const_coef := freq_model_coef[15]]
+
+# Calculate the weighted average of parameters by age, sex and IMD quintile
+freq_model_coef_av <- data[ , .(
+  mean_consump_coef = sum(mean_consump_coef * wt_int, na.rm = T) / sum(wt_int, na.rm = T),
+  age_coef = sum(age_coef * wt_int, na.rm = T) / sum(wt_int, na.rm = T),
+  imd_coef = sum((income_coef + ethn_coef + leaveed_coef + child_coef + class_coef) * wt_int, na.rm = T) / sum(wt_int, na.rm = T),
+  const_coef = sum(const_coef * wt_int, na.rm = T) / sum(wt_int, na.rm = T) 
+), by = c("age_cat", "sex", "imd_quintile")]
+
+data[ , `:=`(mean_consump_coef = NULL, age_coef = NULL, income_coef = NULL, ethn_coef = NULL,
+             leaveed_coef = NULL, child_coef = NULL, class_coef = NULL, const_coef = NULL)]
+
+################################
+################################
+
+# expected standard deviation of drinking occasions
+
+data[ , mean_consump_coef := select_model_coef[1]]
+
+data[ , age_coef := 0]
+data[age_temp %in% c("25-29", "30-34"), age_coef := select_model_coef[2]]
+data[age_temp %in% c("35-39", "40-44"), age_coef := select_model_coef[3]]
+data[age_temp %in% c("45-49", "50-54"), age_coef := select_model_coef[4]]
+data[age_temp %in% c("55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85-89", "90+"),
+     age_coef := select_model_coef[5]]
+
+data[ , employ_coef := 0]
+data[employ2cat == "unemployed", employ_coef := select_model_coef[6]]
+
+data[ , income_coef := 0]
+data[income5cat == "1_lowest_income", income_coef := select_model_coef[7]]
+
+data[ , ethn_coef := 0]
+data[ethnicity_2cat == "non_white", ethn_coef := select_model_coef[8]]
+
+data[ , leaveed_coef := 0]
+data[eduend4cat == "never_went_to_school", leaveed_coef := select_model_coef[9]]
+data[eduend4cat == "15_or_under", leaveed_coef := select_model_coef[10]]
+data[eduend4cat == "16-18", leaveed_coef := select_model_coef[11]]
+
+data[ , child_coef := 0]
+data[kids == "1", child_coef := select_model_coef[12]]
+data[kids == "2", child_coef := select_model_coef[13]]
+data[kids == "3+", child_coef := select_model_coef[14]]
+
+data[ , class_coef := 0]
+data[social_grade == "C2DE", class_coef := select_model_coef[15]]
+
+data[ , const_coef := select_model_coef[16]]
+
+# Calculate the weighted average of parameters by age, sex and IMD quintile
+select_model_coef_av <- data[ , .(
+  mean_consump_coef = sum(mean_consump_coef * wt_int, na.rm = T) / sum(wt_int, na.rm = T),
+  age_coef = sum(age_coef * wt_int, na.rm = T) / sum(wt_int, na.rm = T),
+  imd_coef = sum((employ_coef + income_coef + ethn_coef + leaveed_coef + child_coef + class_coef) * wt_int, na.rm = T) / sum(wt_int, na.rm = T),
+  const_coef = sum(const_coef * wt_int, na.rm = T) / sum(wt_int, na.rm = T) 
+), by = c("age_cat", "sex", "imd_quintile")]
+
+data[ , `:=`(mean_consump_coef = NULL, age_coef = NULL, employ_coef = NULL, income_coef = NULL,
+             ethn_coef = NULL, leaveed_coef = NULL, child_coef = NULL, class_coef = NULL, const_coef = NULL)]
+
+################################
+################################
+
+# occasion level standard deviation of the quantity consumed in a drinking occasion
+
+data[ , mean_consump_coef := sdv_model_coef[1]]
+
+data[ , income_coef := 0]
+data[income5cat == "1_lowest_income", income_coef := sdv_model_coef[2]]
+
+data[ , imr_coef := sdv_model_coef[3]]
+
+# Calculate the weighted average of parameters by age, sex and IMD quintile
+sdv_model_coef_av <- data[ , .(
+  mean_consump_coef = sum(mean_consump_coef * wt_int, na.rm = T) / sum(wt_int, na.rm = T),
+  imd_coef = sum(income_coef * wt_int, na.rm = T) / sum(wt_int, na.rm = T),
+  imr_coef = sum(imr_coef * wt_int, na.rm = T) / sum(wt_int, na.rm = T) 
+), by = c("age_cat", "sex", "imd_quintile")]
+
+data[ , `:=`(mean_consump_coef = NULL, income_coef = NULL, imr_coef = NULL)]
+
+################################
+################################
+
+# average of height and weight
+height_weight_av <- data[ , .(
+  height = sum(height * wt_int, na.rm = T) / sum(wt_int, na.rm = T),
+  weight = sum(weight * wt_int, na.rm = T) / sum(wt_int, na.rm = T)
+), by = c("age_cat", "sex", "imd_quintile")]
+
+################################
+################################
+
+# put the data into a list
+binge_params_stapm_scot <- list(freq_model_coef_av, select_model_coef_av, sdv_model_coef_av, height_weight_av)
+
+# Save the result to the package data folder
+usethis::use_data(binge_params_stapm_scot, overwrite = T)
+
+
diff --git a/data/binge_params_stapm_scot.rda b/data/binge_params_stapm_scot.rda
new file mode 100644
index 0000000000000000000000000000000000000000..b5b0ccda3ec6132d51b83dcedda2702f5b537cc0
GIT binary patch
literal 4730
zcmV-=5{2zTT4*^jL0KkKSqaija{wCQfB*mg|NsC0|NsC0|NsC0|NsC0|NsC0|NsC0
z|NsC0|Nr0(zkLN#_Pq4!0`L>BH#5Ki^j{xF_gw9Bruz4nxnFnQ_qX2>DXEE*LrLh2
z(Ha^uVFN}(CYm%E2dHVLj7Eb&jZGL4pbZ*rL7>Doqh&PFriPfAF*Gqeo}o1LG|`O>
z5Xq)!)byD>OqzO4l=Te=l#Mh~^Z?OKgvC76o=NI?P3Tj~dYep4o~h+L(mg5SV5fm5
z(WXyR357f+PfVotnx25B^eN`4>TRU-O`}N3sgq$RqY`?a0XBl33V9RCdY%P64W%}j
zAk;~u)Y(r$Xw%YY20+adO);d+BN6DBn1`VlPt^}ewHjn(3`RyKnFfH+&}0lICXEA4
z82|yJMvQ|XJrh9CXvAm(L8O|QQzxmXqf<?(qfCq=(FTBM42=V1fM^ELfN7IMKze{Q
z0LTMF(9{5E$)U7Hni?8pGH3>xG62wGG&BJ903<3(o@l3{8fa+H219BA>I^}k(d9G;
zsh|d$GHB2TsPzLznh#I_0MPXf9;Va)41fRw$^g(Fpfq6vMw%LWfY2(Eh{#MdRQ*Ks
z@>BFoXiU>nN2NTcspOxO(=|pk(tf6+YGxG8p(ZD$iRyVq)ICpA(^JxUMvWeuN<0%J
z+JiKkQRy>Lqsj(o^%^5hGGVFY(HeR&N2aDH>Y8~+!%c-S5mr;x6Gd$-z(QpjVgeH;
zCISRP1VD>D0>SimTq6H2XPI?l)K&lmWn$1a*(O&jtF6rWNf86+PmaR~QD7eIi9{?F
zB^gORChAHwL=j;VT)d`<u`dFnH%Uc_@NMVB_@|zp2vWqM0uqTL0uq!&E5P_bbKp)B
z!3-Ia5a>vRMeKq`6-GosMoBqbz8A$&6XZ@RjX0(PsECWvK5`{O0;Pb2mkPlEuZbj*
zNug?NKnpj!>=~{{Td0CP4=-`TqFih_7;8y+``zECN{D}10Uj6a{&n`gJvH(L6(lhf
zPg^@kNi4_$7=rKjuf}8QgY?4H<S8r8E(Hw<AT#%sv#5a#{uhV7!qKDF?B+lrrkC{R
z`+2_9k<5M&hPrA5H_~_8%$G~Vww#VEh2I1Jb6z$Jp!4kf#5cWFIUIW%EWIR>NhMc}
z&tz?^t7>z;Ev~(W?6+Yhi>3<e4_!tQzkb_uJuW8w*xox$*QohKl}c~`00<RC&RxI?
z+t-eju$r%DwrnvF9vQd2t_3BF^ovh!w|UXj!YQ`Zdz|HeU6JegG{9>3y4rlMHs@<=
zN+KOUqqfJP)@}7Zp<a`>HS*EB>uJvdQ@7tb_Hzewjtp{mmqc-ZTH@Z>aNM^z^iI>-
z>lxI?WiD3Cc2o6K*b6`boiV+to~V;1>UFq43Mv==30A+;-1uO8ytP(~yb-;6W<IK>
zlEsgizO}cXW=9ssg)?>O-T(j)L7;MM!G|=$M6GnfBKGX%&N@<$Yjh-o*STNQWm2l5
zM^b8L`v)7Ou%3y5XVqnyZr;D8-{EfW)IZF6^ar6XfN%f-28TquEmFZH57W_@#o@7B
zDkojPKG<^CA94M+gfVpss`iZAq_m7RUH||fh_fD#fWprs`FAE3EPy~TKp)}cQjms^
zywFs?v9D{v(shScHB$GlV1^PkphaI+_U8lQ!L;}Q0Sw2HhP>sn{ttyWQPt$RpM9<R
zf*K(WSVTp(mvM7%XP3^HPg~1~GysHc^`2}KFaT}1)BVU>KQh1a6iMI!0352crh|49
zG%RM|Vc18ARk>TFH9{G22?%o@FWvJ1yZ^L)7#fbBtH{%cajFC~n2)1*x#u+eYKovZ
zxeAdAfexFt?ykR1n=#aOm~jg+4zNPBLdZj4A?|?k@&6LELbO7s*Mwkqmijy;Ww&84
zLbO7(LbO7(LbOABRv}g)KtyA<wVUuaQ!<v-o;NQ|s=CpLYA6U$5S)gC?{swEAI07N
zUDmeW&7dJ`Kth0q=I<Y%@2LbQh<kcGwp#N`l(EkUe9CM_XZazu^nRC}ah1ntxvsZ8
z?nk%qS=an}9_2qHfr`s@V{?*elJ5FkcV51Qfqy^<LOOoCH_}m-)`Y8^>@LJZ*-f+R
zzx{sRb1bqgB!<HN__`p1hh5-$;BYv>ybG#j3>!mghT3ch(u9O#9Asm1L9s`Tv=-jZ
z&j#BTU7|H45{^ics-g%RY|I78P~q$ame-?DucrazLNq4oTuo!K)(KVp3w&w2R!IpQ
zk`hGP7&#!?jS;BJ(<Kf@qmD7$<cH`K=q6zv(@XMnn20lUNh>y~xX~m^3`J!TP|+A)
ztuMH)|8uIq>GpP6?PC$H*lHMED`do)<?EWe%3r=^I3>C~mWR@!=nt=1-?%Zv!q+nB
zb#GO}n*SoyK<0JEBaGn~q!ffmBF>A<@${R|H=tuu)_ZvE9f>88MULc8E?+6$C~hrr
zPNc~=_if)h6|A(zHV$$5?3t>FM5A68PNC_iZUP$GrIna%QHgkt^>2seXt=FY@cdWQ
zD;lHU5;egg!5$BE<FaYItwCRjKwfsD)VYd!Ja}x%@(aDZ;@ITJGmX@B*cTXzPzWc6
zHjEGelc&|d002O_Ebq<rS4km}Qw_;r#n#2<tSS?<teQZYK9~Rj2G2oNbfjpGX6-dJ
zTg5AFjh=|9xV6D#E~^0oiFGx}XmQ;Lt8Ij600IgOMm^Q)PsRuyO>1$zEiJ4X|4;du
z73rRy7kOcSn=6++ys~FHLlJ->5(u$`C|y$#AOb(K0vS;TGlM#iMprE}_^5SkyLT11
zCqg`I6)FpNdu5LC7Jd8eS9&=-og`QLGm=ij<d>J;=CV{Ol!tEwAhGVLAAW9Q?7Iqo
z-V&p`^5AGg&RZBt|A~_<)`zaprVvq-J``vBj)#f<DCRc2PEovj6ScEY%GtX+!Z9dv
z@|p-PXsnb=@7JN_U+Wa&H7uMPtMAe-NnMe%UL>j#!d>ipe^@9sp6l}#HieR!fyuu8
z!@0E9V6oe}tq+<I*XsL?$7RLkc>OOo<Z!n1GMYii_F_`+(KO`|iA4|$cp6|R=*6$-
zvhno)p6}PB;9oR){>OE3+tt>Oo0I5iaQLak;I}WHsS5B&A|wJtMf94LLMPvOxu9bO
z>~pl=-?^-^;PE<7dWVy;u+nL9hxx4J^|dzKeGRWkjlKP$@MyTxQ4HB6V<?U$Z>q=T
zDAXkFZ&Mf`60(#sdl`jVB2)zc6B}7XH*zXFFPApVr6eGR`va`Goeb0?OGt`9J3tt`
zvLU2@wfg7~#sJ3CDl`*^5^HHLnon!+Oz;1GzG?HN8`&5*h)CH=4RoKnT#~;TZB^v{
zE{n74t*_V)c!}R2t=ed`TecX*VC*wBI=sa%1AnyWumzaRWzk+F&0sDDC<j4}`rXF2
zjk1@w>i3?N#~WSZQd~NoN3p5rJWEQPKeLIur@+ku7?t>vGN53pLKsW`m7d6OedPMD
zE9G8+3{^}-P(K<ZpaoJWlWlJ<Hh*(vX-}4x`fV~TF*(Z`tg;e=!>RQ9-VYYU8ITOs
zDAtTk4J>m9rYjnOIk&oi00F|B5f*9r?adh`kOW7)Ckc1O1E?rVnE_Ya{;uyAzp&L(
z(slCluen;TR}t<C%aagH0z^bn%|$fp9tPTeiAp4)!>b@Dg())i2Q4jM_QmVHk5gG(
zGgqW{wpl(?Z|G+^&=V~+H(m$V?*bS5zu|x9vhEc|$I81GDUxL(paC_e)VpZtmq8>|
zK!}V)q00bp1Gd%1**deUf~9V?Cy)KF;qaGfUpHgCT@^Lm-TQfqn3z*2`;alpM~GAu
zbdtzgPMa}y`->RB{6MyOY)7k53a?w2f>D~UHl;P;1;79Rfa?>P=4joW-=nZir^uNr
z&YMTw=;b9m6GelDj?XJbjGi?-WFyu5D|So=szBAJ$hemy*>=CPJH<xRwdIH2?9enp
zfB*<23t%P{GMf!6X2UO1$dN>Y{)6l4Th}0UYv#5o=G6cMoeWY}LP8J#2lC$wFh#<+
zU=K^{1N5zwZ_J-_XY=Y?shrc<laG{nb$K{#qrZr}+Dh{e@YdQ%_vz04M06XB$dPqf
z_<Fu`{J0BmGLO>;Zlyu{bvLiA9%e-|gUVUETdLxwl^o&yp|HALfP7QP!2kdd9NUt5
zbfa5wWT!JklYphF^SPxJ%r|N8LudCxh?B#!zj<j`&qFxktbtY`bB%m_q~YGTxl38z
zE3V;`ep~A5)Axmy00Ik{%xspqQ$L6m772uC{M9Sc9OUdRu9zJ|G1)LdJLoZO1yVN6
z!dhqtwf_@-#NYrRMNnQl9l|@DEBh@=6iO!ImJYOm|9M~#Mb$ldmL9>4oTFfuN7?h;
zC8Kgk0uYr%6)hzDWamWC+h|i3*;4I5pa2Lnh%}|+FZ83gvkR#cAYnrDPMv}0HQWRn
zdLF{$W7H3u$;12B+&eZyBsfkf6Wh=L0D;ACn0;N8YJ5_P^W<|C1?$YKm<T8b<BUtb
zTE3Hs&;C%b<iMm-QVPQxUGcwZ!<HjOT-h||Yysi`01ys+9F!i6C-S=J1Uj8MrQmMB
z&#u`jaMD6Ug+r@H+o4~s*i57oj(G^&V{Yj;rZEBp0BC+|(x7mdg*2wL)o1IKzf7%Q
zWIzNCP7TV{RrTuLlf$E&dos==>1%lJDnKJ*D<KU&Tse0pS4Kd)UNz{!qZVKQ07E#u
zF{?M4Y}W%c0t1cGI%Se|3gA}bxP7sg1MKg9@}lFyWO3dO)vpKW002N;`tF0VhIFOm
zJ4Ug+7q0r-cA?nRe(F{RZS~=4HMDau=us{uw?n7Nyq-O@F#X-q;4eASJST63Mo{ix
z$`wd>JjC+R4(YktWU$IlqDTAltA@5yV=vXzH^VnCpN=1NUTvK7;Q0%dRstVmuJgWJ
zvn8)QmfusoL1AZQG~HddQ15z}(zV5WZF)nlEAsEL;LT$a+*!-b^2sgVRzP+wVa-K9
zRp!m{w7Z4qnq~St3)A%mEm<yheCC>FSsTYYNbX`NNCDMSm{MganikV)_7_v<p7pyg
zvcjb)BYJV{mNivbv$vJAteMTQblK#5krYjd>~ruCvu~6a*P<ZyV4TDZf&c-uNfU%5
zASa~2p@l!g0eIeTmYkj&A&1c|t!brONS0=_;d(&=&_e7_{j{fyOdG$o2M@b+Cyf(F
z>YVeO00>K^POnK189243D;oV=#f9zg?|2|<UQLY3BY9n-=HrgiC5eWc<sl-=IRF44
zJZE+V!xc7J+773rW|HMfdJbmGkAin7^;vKbHmC|1VtqF!tlDYspoL<n=dr1yFEv#;
zX3H$u=}5N(Q&AQCiq$Y%n|+)C1d?EEmsv{?<wqPoMOW;aD1>~|!)8{t7kB3eVIroQ
z4dpqeJ*+B-BPuI8iw(oE8FBzoKe(GpR?@hbWc)#vh-EmckBr@T5DiRT4QT)X2nYo0
za_i3o*)S6(dXo}D^ZIB7I98m195y!z>Klket*S6YP|ygoND&HS{$iESrg8Z^!KM{=
zzT^FE86y+Yqz87ogAS-6uRP?$S?AQCq{Df=N<(mq6EQW&00>e;pOE~363>^3rg_O5
zI~JEXO5+OqBZHI;xBvivnOTfnqXJFC+dG2j*m&>|X!4x9`yBaDLHD_=B~RD4PE=H+
zAoMSXuTr7RH+aKQ9m<SMvE2a(YL+W%C{YS@9YyY`kp#oBlQ@07WDcm-vV2=GhC>~l
z$k@dq5{N-bCDhG~x0iN2KE~#Awv^vXVaKAQ$-1XCARtu6`k4C)wLg6!PlO<VvQ(eH
zh7J{JsJtpuEw_7lKim}kfB;25b4L<Ui0Cr9j=ynbQ&7v@#8Z^=Xgxqi(?u5VxOqos
zfEa8DWJG)<C78ef1OseXvn2ZDyb@zl#*7mfmdinLZ8K5NluciXPdu(Ac^RU<ZKuX6
zTzGl;XZWZkQ`2@JL2pj6YxYcn3+N=$P-0ZA<n5E*Zcd3vD2nSZ@oo^au-W80uBR8j
zrwDA~o}WYA(MM{8q7j~`{F&U1kueH-bxhtr{i7{1_;Fg;004mTEYt_&b}$`@hf)Fv
zD@gCBS|G2rf6ratKV@Qv<E$)6lQP8<U$vIy?M+g6002Nh=c@~o+r4R4J=kB%+;b^y
z!(5(-ivR)%fC(747InGye2UtbGF%Z!9tjm20J|)vm(vX(1hhZuUG(?~0RM}*BAh5l
I1nDNZfLx91!~g&Q

literal 0
HcmV?d00001

diff --git a/man/binge_params.Rd b/man/binge_params.Rd
index e5d0750..d320c52 100644
--- a/man/binge_params.Rd
+++ b/man/binge_params.Rd
@@ -3,7 +3,7 @@
 \docType{data}
 \name{binge_params}
 \alias{binge_params}
-\title{Parameters to estimate amount drunk on single occassions}
+\title{Parameters to estimate amount drunk on single occasions}
 \format{
 A list of three data tables (1 = Table 3, 2 = Table 5, 3 = Table 6)
 }
@@ -24,6 +24,6 @@ Table 5 - Fitted Heckman selection model for probability that an individual drin
 Table 6 - Fitted Heckman outcome regression results for the standard deviation in the quantity of alcohol consumed in a drinking occasion   
 
 We do not use parameter estimates from Table 4 - Fitted Tobit regression model for the mean grams of alcohol consumed during a drinking occasion. 
-This is because we calculate from the data by dividing the weekly mean alcohol consumption by the estimated number of weekly drinking occassions.
+This is because we calculate from the data by dividing the weekly mean alcohol consumption by the estimated number of weekly drinking occasions.
 }
 \keyword{datasets}
diff --git a/man/binge_params_stapm.Rd b/man/binge_params_stapm.Rd
index d6dec25..83fa76b 100644
--- a/man/binge_params_stapm.Rd
+++ b/man/binge_params_stapm.Rd
@@ -3,7 +3,7 @@
 \docType{data}
 \name{binge_params_stapm}
 \alias{binge_params_stapm}
-\title{Parameters to estimate amount drunk on single occasions - STAPM version}
+\title{Parameters to estimate amount drunk on single occasions - England - STAPM version}
 \format{
 A list of four data tables (1 = Negative binomial regression model for the number of weekly drinking occasions, 
 2 = Fitted Heckman selection model for probability that an individual drinks on at least 3 separate occasions during the diary period, 
diff --git a/man/binge_params_stapm_scot.Rd b/man/binge_params_stapm_scot.Rd
new file mode 100644
index 0000000..81ea812
--- /dev/null
+++ b/man/binge_params_stapm_scot.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/binge_params.R
+\docType{data}
+\name{binge_params_stapm_scot}
+\alias{binge_params_stapm_scot}
+\title{Parameters to estimate amount drunk on single occasions - Scotland - STAPM version}
+\format{
+A list of four data tables (1 = Negative binomial regression model for the number of weekly drinking occasions, 
+2 = Fitted Heckman selection model for probability that an individual drinks on at least 3 separate occasions during the diary period, 
+3 = Fitted Heckman outcome regression results for the standard deviation in the quantity of alcohol consumed in a drinking occasion, 
+4 = average height and weight)
+}
+\source{
+Hill-McManus et al 2014. "Estimation of usual occasion-based individual drinking patterns using diary survey data". https://doi.org/https://doi.org/10.1016/j.drugalcdep.2013.09.022.
+}
+\usage{
+binge_params_stapm_scot
+}
+\description{
+Scottish version of `binge_params_stapm` based on the Scottish Health Survey years 2008 to 2019.
+}
+\keyword{datasets}