-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add stapm version of alcohol binge parameters for Scotland
- Loading branch information
1 parent
816dcf7
commit 02f23f4
Showing
15 changed files
with
492 additions
and
54 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,3 +9,6 @@ dev/ | |
.DS_Store | ||
data-raw/binge_params/*.csv | ||
.rds | ||
data-raw/binge_params/Scotland/*.rds | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
|
||
The aim of this code is to process the parameters that allow the modelling of | ||
acute harms that are partially attributable to alcohol in STAPM | ||
|
||
# This is the SAPM method | ||
data_sapm_test <- tobalcepi::AlcBinge(data) | ||
|
||
adapt the method to predict the number of drinking occassions | ||
and the distribution of amount consumed | ||
as a function of the average weekly amount of alcohol consumed, age, sex and IMD quintile. | ||
|
||
assign the parameter values from Hill-McManus et al. to each individual | ||
and then average these values by age, sex and IMD quintile | ||
|
||
the IMD quintile averages will then reflect the variation among IMD quintiles | ||
in the distribution of ethnicity, income, number of children etc. | ||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
|
||
# The aim of this code is to clean the tobacco and alcohol data from the Scottish Health Survey | ||
|
||
# note: no questions asked to < 16 year olds in Shes | ||
|
||
# Using functions in the hseclean package | ||
library(hseclean) | ||
library(data.table) | ||
library(magrittr) | ||
|
||
# Location of Scottish data | ||
root_dir <- "X:/HAR_PR/PR/Consumption_TA/HSE/Scottish Health Survey (SHeS)/" | ||
|
||
# The variables to retain | ||
keep_vars = c( | ||
"hse_id", "wt_int", "psu", "cluster", "year", "quarter", | ||
"age", "age_cat", "sex", "imd_quintile", | ||
"ethnicity_2cat", | ||
"degree", "marstat", "relationship_status", "employ2cat", "social_grade", "kids", "income5cat", | ||
"nssec3_lab", "man_nonman", "activity_lstweek", "eduend4cat", | ||
|
||
"hse_mental", | ||
|
||
"weight", "height", | ||
|
||
"drinks_now", | ||
"drink_freq_7d", "n_days_drink", "peakday", "binge_cat", | ||
"beer_units", "wine_units", "spirit_units", "rtd_units", | ||
"weekmean", "drating", "dnoft", "dnnow", "dnany", "dnevr", | ||
"perc_spirit_units", "perc_wine_units", "perc_rtd_units", "perc_beer_units", | ||
"drinker_cat", | ||
#"spirits_pref_cat", "wine_pref_cat", "rtd_pref_cat", "beer_pref_cat", | ||
"total_units7_ch", | ||
|
||
# Smoking | ||
"cig_smoker_status", | ||
"years_since_quit", "years_reg_smoker", "cig_ever", | ||
"smk_start_age", "smk_stop_age", "censor_age", | ||
"cigs_per_day", "smoker_cat", "hand_rolled_per_day", "machine_rolled_per_day", "prop_handrolled", "cig_type") | ||
|
||
|
||
# Main processing | ||
|
||
cleandata <- function(data) { | ||
|
||
data <- clean_age(data) | ||
data <- clean_demographic(data) | ||
data <- clean_education(data) | ||
data <- clean_economic_status(data) | ||
data <- clean_family(data) | ||
data <- clean_income(data) | ||
data <- clean_health_and_bio(data) | ||
|
||
data <- alc_drink_now_allages(data) | ||
data <- alc_weekmean_adult(data) | ||
data <- alc_sevenday_adult(data) | ||
|
||
data <- smk_status(data) | ||
data <- smk_former(data) | ||
data <- smk_life_history(data) | ||
data <- smk_amount(data) | ||
|
||
data <- select_data( | ||
data, | ||
ages = 16:89, | ||
years = 2008:2019, | ||
keep_vars = keep_vars, | ||
complete_vars = c("age", "sex", "imd_quintile", "psu", "cluster", "year") | ||
) | ||
|
||
return(data) | ||
} | ||
|
||
shes_data <- combine_years(list( | ||
cleandata(read_SHeS_2008(root = root_dir)), | ||
cleandata(read_SHeS_2009(root = root_dir)), | ||
cleandata(read_SHeS_2010(root = root_dir)), | ||
cleandata(read_SHeS_2011(root = root_dir)), | ||
cleandata(read_SHeS_2012(root = root_dir)), | ||
cleandata(read_SHeS_2013(root = root_dir)), | ||
cleandata(read_SHeS_2014(root = root_dir)), | ||
cleandata(read_SHeS_2015(root = root_dir)), | ||
cleandata(read_SHeS_2016(root = root_dir)), | ||
cleandata(read_SHeS_2017(root = root_dir)), | ||
cleandata(read_SHeS_2018(root = root_dir)), | ||
cleandata(read_SHeS_2019(root = root_dir)) | ||
)) | ||
|
||
# Load population data for Scotland | ||
# from here - X:\ScHARR\PR_Mortality_data_TA\data\Processed pop sizes and death rates from VM | ||
|
||
scot_pops <- fread("X:/ScHARR/PR_Mortality_data_TA/data/Processed pop sizes and death rates from VM/pop_sizes_scotland_national_v1_2022-12-13_mort.tools_1.5.0.csv") | ||
setnames(scot_pops, c("pops"), c("N")) | ||
|
||
# adjust the survey weights according to the ratio of the real population to the sampled population | ||
shes_data <- clean_surveyweights(shes_data, pop_data = scot_pops) | ||
|
||
# remake age categories | ||
shes_data[, age_cat := c("16-17", | ||
"18-24", | ||
"25-34", | ||
"35-44", | ||
"45-54", | ||
"55-64", | ||
"65-74", | ||
"75-89")[findInterval(age, c(-1, 18, 25, 35, 45, 55, 65, 75, 1000))]] | ||
|
||
setnames(shes_data, | ||
c("smk_start_age", "cig_smoker_status", "years_since_quit"), | ||
c("start_age", "smk.state", "time_since_quit")) | ||
|
||
# Checks on data | ||
|
||
shes_data[(spirit_units + wine_units + rtd_units + beer_units) != weekmean] | ||
|
||
shes_data[drinker_cat != "abstainer" & weekmean == 0] | ||
|
||
# some drinkers have no data for average weekly consumption | ||
# remove these individuals from the dataset - rather than imputing the missing data | ||
shes_data <- shes_data[!(drinker_cat != "abstainer" & weekmean == 0)] | ||
|
||
shes_data[drinker_cat != "abstainer" & is.na(weekmean)] | ||
|
||
# select only rows with complete information on average weekly alcohol consumption | ||
shes_data <- shes_data[!is.na(weekmean)] | ||
|
||
shes_data[smoker_cat != "non_smoker" & cigs_per_day == 0] | ||
|
||
# select only rows with complete information on average weekly alcohol consumption | ||
shes_data <- shes_data[!is.na(smk.state)] | ||
|
||
nrow(shes_data) | ||
|
||
#shes_data[is.na(drinker_cat)] | ||
|
||
######## Write the data | ||
|
||
# note the package version so that the data can be tagged with it | ||
ver <- packageVersion("hseclean") | ||
|
||
saveRDS(shes_data, paste0("X:/ScHARR/PR_STAPM/Code/R_packages/tobalcepi/data-raw/binge_params/Scotland/tobalc_consumption_scot_national_2008-2019_v1_", Sys.Date(), "_hseclean_", ver, ".rds")) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
|
||
# This code reads the processed health survey datasets for each year | ||
# and conducts imputation. | ||
|
||
# all the variables to be imputed are categorical | ||
|
||
library(data.table) | ||
library(hseclean) | ||
|
||
# Load the data | ||
|
||
# choose the file output by 10_clean_shes.R | ||
|
||
data <- readRDS("X:/ScHARR/PR_STAPM/Code/R_packages/tobalcepi/data-raw/binge_params/Scotland/tobalc_consumption_scot_national_2008-2019_v1_2022-12-18_hseclean_1.9.2.rds") | ||
|
||
# sapply(data, class) | ||
|
||
# view variables with missingness | ||
misscheck <- function(var) { | ||
x <- table(var, useNA = "ifany") | ||
na <- x[which(is.na(names(x)))] | ||
if(length(na) == 0) na <- 0 | ||
perc <- round(100 * na / sum(x), 2) | ||
#return(c(paste0(na, " missing obs, ", perc, "%"))) | ||
return(na) | ||
} | ||
|
||
n_missing <- sapply(data, misscheck) | ||
missing_vars <- n_missing[which(n_missing > 0)] | ||
missing_vars | ||
|
||
# household equivalised income has the most missingness | ||
|
||
# The categorical variables involved | ||
var_names <- c( | ||
"kids", | ||
"relationship_status", | ||
"ethnicity_2cat", | ||
"eduend4cat", | ||
"degree", | ||
"nssec3_lab", | ||
"employ2cat",# complete variable | ||
"activity_lstweek",# complete variable | ||
"income5cat", | ||
"hse_mental", | ||
"drinker_cat", | ||
"social_grade", | ||
"smk.state" | ||
) | ||
|
||
# Note that the imputation wont work unless the variables considered are | ||
# either subject to imputation or do not contain any missingness (i.e. are complete) | ||
|
||
# The variables to be imputed and the method to be used | ||
var_methods <- rep("", ncol(data)) | ||
|
||
var_methods[which(var_names == "kids")] <- "polr" | ||
var_methods[which(var_names == "relationship_status")] <- "polyreg" | ||
var_methods[which(var_names == "ethnicity_2cat")] <- "logreg" | ||
var_methods[which(var_names == "eduend4cat")] <- "polyreg" | ||
var_methods[which(var_names == "degree")] <- "logreg" | ||
var_methods[which(var_names == "nssec3_lab")] <- "polyreg" | ||
var_methods[which(var_names == "income5cat")] <- "polr" | ||
var_methods[which(var_names == "hse_mental")] <- "logreg" | ||
var_methods[which(var_names == "social_grade")] <- "polyreg" | ||
|
||
|
||
# Set order of factors where needed for imputing as ordered. | ||
data[ , kids := factor(kids, levels = c("0", "1", "2", "3+"))] | ||
data[ , income5cat := factor(income5cat, levels = c("1_lowest_income", "2", "3", "4", "5_highest_income"))] | ||
|
||
|
||
# Impute missing values | ||
|
||
# Run the imputation | ||
imp <- impute_data_mice( | ||
data = data, | ||
var_names = var_names, | ||
var_methods = var_methods, | ||
n_imputations = 5 | ||
# for testing just do 1 imputation | ||
# but test with more later | ||
# for point estimates, apparently 2-10 imputations are enough | ||
) | ||
|
||
data_imp <- copy(imp$data) | ||
|
||
|
||
# note the package version so that the data can be tagged with it | ||
ver <- packageVersion("hseclean") | ||
|
||
saveRDS(data_imp, paste0("X:/ScHARR/PR_STAPM/Code/R_packages/tobalcepi/data-raw/binge_params/Scotland/tobalc_consumption_scot_national_2008-2019_v1_", Sys.Date(), "_hseclean_", ver, "_imputed.rds")) | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
Oops, something went wrong.