From 301d6b14d4133b9110cb4f5fcecd95df898632fd Mon Sep 17 00:00:00 2001 From: Damon Morris Date: Wed, 4 Oct 2023 11:09:22 +0100 Subject: [PATCH] update read functions to use renamed quarterly LFS datasets from 2014- --- R/lfs_read_2014.R | 2 +- R/lfs_read_2015.R | 2 +- R/lfs_read_2016.R | 2 +- R/lfs_read_2017.R | 2 +- R/lfs_read_2018.R | 2 +- R/lfs_read_2019.R | 2 +- R/lfs_read_2020.R | 74 +++++++---------------------------------------- R/lfs_read_2021.R | 12 ++++---- R/lfs_read_2022.R | 36 ++--------------------- R/lfs_read_2023.R | 21 +++++++------- 10 files changed, 36 insertions(+), 119 deletions(-) diff --git a/R/lfs_read_2014.R b/R/lfs_read_2014.R index 8008da0..879072b 100644 --- a/R/lfs_read_2014.R +++ b/R/lfs_read_2014.R @@ -32,7 +32,7 @@ lfs_read_2014 <- function( cat(crayon::green("\tJul - Sep...")) data.q3 <- data.table::fread( - paste0(path ,"/lfsp_js14_end_user.tab"), showProgress = FALSE, + paste0(path ,"/lfsp_js14_eul_pwt18.tab"), showProgress = FALSE, na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") ) diff --git a/R/lfs_read_2015.R b/R/lfs_read_2015.R index 5f691ff..af1311d 100644 --- a/R/lfs_read_2015.R +++ b/R/lfs_read_2015.R @@ -32,7 +32,7 @@ lfs_read_2015 <- function( cat(crayon::green("\tJul - Sep...")) data.q3 <- data.table::fread( - paste0(path ,"/lfsp_js15_eul.tab"), showProgress = FALSE, + paste0(path ,"/lfsp_js15_eul_pwt18.tab"), showProgress = FALSE, na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") ) diff --git a/R/lfs_read_2016.R b/R/lfs_read_2016.R index 88abe8f..21ee06e 100644 --- a/R/lfs_read_2016.R +++ b/R/lfs_read_2016.R @@ -32,7 +32,7 @@ lfs_read_2016 <- function( cat(crayon::green("\tJul - Sep...")) data.q3 <- data.table::fread( - paste0(path ,"/lfsp_js16_eul.tab"), showProgress = FALSE, + paste0(path ,"/lfsp_js16_eul_pwt18.tab"), showProgress = FALSE, na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") ) diff --git a/R/lfs_read_2017.R b/R/lfs_read_2017.R index 3b1c599..4f5d7f0 100644 --- a/R/lfs_read_2017.R +++ b/R/lfs_read_2017.R @@ -32,7 +32,7 @@ lfs_read_2017 <- function( cat(crayon::green("\tJul - Sep...")) data.q3 <- data.table::fread( - paste0(path ,"/lfsp_js17_eul.tab"), showProgress = FALSE, + paste0(path ,"/lfsp_js17_eul_pwt18.tab"), showProgress = FALSE, na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") ) diff --git a/R/lfs_read_2018.R b/R/lfs_read_2018.R index fc8eb51..62b9569 100644 --- a/R/lfs_read_2018.R +++ b/R/lfs_read_2018.R @@ -32,7 +32,7 @@ lfs_read_2018 <- function( cat(crayon::green("\tJul - Sep...")) data.q3 <- data.table::fread( - paste0(path ,"/lfsp_js18_eul.tab"), showProgress = FALSE, + paste0(path ,"/lfsp_js18_eul_pwt18.tab"), showProgress = FALSE, na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") ) diff --git a/R/lfs_read_2019.R b/R/lfs_read_2019.R index 00a4c9a..90d9964 100644 --- a/R/lfs_read_2019.R +++ b/R/lfs_read_2019.R @@ -32,7 +32,7 @@ lfs_read_2019 <- function( cat(crayon::green("\tJul - Sep...")) data.q3 <- data.table::fread( - paste0(path ,"/lfsp_js19_eul.tab"), showProgress = FALSE, + paste0(path ,"/lfsp_js19_eul_pwt18.tab"), showProgress = FALSE, na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") ) diff --git a/R/lfs_read_2020.R b/R/lfs_read_2020.R index 133f144..cfaeb76 100644 --- a/R/lfs_read_2020.R +++ b/R/lfs_read_2020.R @@ -20,25 +20,25 @@ lfs_read_2020 <- function( ###### Read in each quarter cat(crayon::green("\tJan - Mar...")) data.q1 <- data.table::fread( - paste0(path, "/lfsp_jm20_eul_pwt18.tab"), showProgress = FALSE, + paste0(path, "/lfsp_jm20_eul_pwt22.tab"), showProgress = FALSE, na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") ) cat(crayon::green("\tApr - Jun...")) data.q2 <- data.table::fread( - paste0(path, "/lfsp_aj20_eul_pwt18.tab"), showProgress = FALSE, + paste0(path, "/lfsp_aj20_eul_pwt22.tab"), showProgress = FALSE, na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") ) cat(crayon::green("\tJul - Sep...")) data.q3 <- data.table::fread( - paste0(path, "/lfsp_js20_eul_pwt18.tab"), showProgress = FALSE, + paste0(path, "/lfsp_js20_eul_pwt22.tab"), showProgress = FALSE, na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") ) cat(crayon::green("\tOct - Dec...")) data.q4 <- data.table::fread( - paste0(path, "/lfsp_od20_eul_pwt20.tab"), showProgress = FALSE, + paste0(path, "/lfsp_od20_eul_pwt22.tab"), showProgress = FALSE, na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") ) cat(crayon::yellow("\tdone\n")) @@ -50,73 +50,18 @@ lfs_read_2020 <- function( clean.data.list <- list() ##### loop the cleaning function over the four quarters - for (l in c(1:3)) { + for (l in c(1:4)) { data <- data.list[[l]] setnames(data, names(data), tolower(names(data))) - weights_vars <- Hmisc::Cs(pwt18,piwt18) + weights_vars <- Hmisc::Cs(pwt22,piwt22) demographic_vars <- Hmisc::Cs(age,sex,gor9d,ethukeul,marsta) education_vars <- Hmisc::Cs(edage,hiqul15d,hiqual15,bte11,sctvc11,gnvq11,nvq11,rsa11,cag11,numol5,numal,numas,hst,advhst,typhst1,typhst2,typhst3,typhst4,typhst5, gcseful1,gcseful2,gcseful3,gcseful4,qgcse41,qgcse42,qgcse43,qgcse44, qual_1,qual_2,qual_3,qual_4,qual_5,qual_6,qual_7,qual_8,qual_9,qual_10,qual_11,qual_12,qual_13,qual_14, qual_15,qual_16,qual_17,qual_18,qual_19,qual_20,qual_21,qual_22,qual_23,qual_24,qual_25,qual_26,qual_27, qual_28,qual_29,qual_30,qual_31) - work_vars <- Hmisc::Cs(inecac05,grsswk,ftptwk,ttachr,ttushr,mpnr02,publicr,indc07m,indd07m,inds07m,soc10m,sc10mmn, - undemp,undhrs,ovhrs,lespay2) - other_vars <- Hmisc::Cs(refwkm,thiswv) - - names <- c(demographic_vars,education_vars,work_vars, weights_vars,other_vars) - names <- tolower(names) - - data <- data[ ,names, with=F] - - data$quarter <- l - data$year <- 2020 - - ### tidy data - - # rename variables which have names which change over time but don't need cleaning separately, and variables - # which don't change over time at all. - - data.table::setnames(data, c("refwkm", "pwt18","piwt18","bte11","sctvc11","gnvq11","nvq11","rsa11","cag11","numol5", - "qual_1","qual_2","qual_3","qual_4","qual_5","qual_6","qual_7","qual_8","qual_9","qual_10", - "qual_11","qual_12","qual_13","qual_14","qual_15","qual_16","qual_17","qual_18","qual_19","qual_20","qual_21","qual_22", - "qual_23","qual_24","qual_25","qual_26","qual_27","qual_28","qual_29","qual_30","qual_31"), - c("month", "pwt", "piwt","btec","sctvec","gnvq","nvqlev","rsa","candg","numol", - "qf1","qf2","qf3","qf4","qf5","qf6","qf7","qf8","qf9","qf10", - "qf11","qf12","qf13","qf14", "qf15","qf16","qf17","qf18","qf19","qf20","qf21","qf22", - "qf23","qf24","qf25","qf26","qf27","qf28","qf29","qf30","qf31") ) - - #preliminary cleaning of the vocational qualification variables - data[btec %in% c(5,6), btec := NA] - data[sctvec %in% c(6,7), sctvec := NA] - data[gnvq %in% c(6,7), gnvq := NA] - data[nvqlev %in% c(6,7), nvqlev := NA] - data[rsa %in% c(5,6), rsa := NA] - data[candg %in% c(4,5), candg := NA] - data[numol %in% c(3), numol := NA] - data[numal %in% c(3), numal := NA] - data[numas %in% c(4), numas := NA] - data[hst %in% c(3), hst := NA] - data[advhst %in% c(3), advhst := NA] - - clean.data.list[[l]] <- data - } - - ##### loop the cleaning function over the four quarters - for (l in c(4)) { - data <- data.list[[l]] - - setnames(data, names(data), tolower(names(data))) - - weights_vars <- Hmisc::Cs(pwt20,piwt20) - demographic_vars <- Hmisc::Cs(age,sex,gor9d,ethukeul,marsta) - education_vars <- Hmisc::Cs(hiqul15d,hiqual15,bte11,sctvc11,gnvq11,nvq11,rsa11,cag11,numol5,numal,numas,hst,advhst,typhst1,typhst2,typhst3,typhst4,typhst5, - gcseful1,gcseful2,gcseful3,gcseful4,qgcse41,qgcse42,qgcse43,qgcse44, - qual_1,qual_2,qual_3,qual_4,qual_5,qual_6,qual_7,qual_8,qual_9,qual_10,qual_11,qual_12,qual_13,qual_14, - qual_15,qual_16,qual_17,qual_18,qual_19,qual_20,qual_21,qual_22,qual_23,qual_24,qual_25,qual_26,qual_27, - qual_28,qual_29,qual_30,qual_31) work_vars <- Hmisc::Cs(inecac05,grsswk,ftptwk,ttachr,ttushr,mpnr02,publicr,indc07m,indd07m,inds07m,soc10m,sc10mmn) other_vars <- Hmisc::Cs(refwkm,thiswv) @@ -133,7 +78,7 @@ lfs_read_2020 <- function( # rename variables which have names which change over time but don't need cleaning separately, and variables # which don't change over time at all. - data.table::setnames(data, c("refwkm", "pwt20","piwt20","bte11","sctvc11","gnvq11","nvq11","rsa11","cag11","numol5", + data.table::setnames(data, c("refwkm", "pwt22","piwt22","bte11","sctvc11","gnvq11","nvq11","rsa11","cag11","numol5", "qual_1","qual_2","qual_3","qual_4","qual_5","qual_6","qual_7","qual_8","qual_9","qual_10", "qual_11","qual_12","qual_13","qual_14","qual_15","qual_16","qual_17","qual_18","qual_19","qual_20","qual_21","qual_22", "qual_23","qual_24","qual_25","qual_26","qual_27","qual_28","qual_29","qual_30","qual_31"), @@ -160,7 +105,10 @@ lfs_read_2020 <- function( ### combine quarters into a single data table - data <- rbind(clean.data.list[[1]],clean.data.list[[2]],clean.data.list[[3]],clean.data.list[[4]], fill=TRUE) + data <- rbind(clean.data.list[[1]], + clean.data.list[[2]], + clean.data.list[[3]], + clean.data.list[[4]], fill=TRUE) data <- data.table(data) diff --git a/R/lfs_read_2021.R b/R/lfs_read_2021.R index 0a17067..597cdfa 100644 --- a/R/lfs_read_2021.R +++ b/R/lfs_read_2021.R @@ -20,25 +20,25 @@ lfs_read_2021 <- function( ###### Read in each quarter cat(crayon::green("\tJan - Mar...")) data.q1 <- data.table::fread( - paste0(path,"/lfsp_jm21_eul_pwt20.tab"), showProgress = FALSE, + paste0(path,"/lfsp_jm21_eul_pwt22.tab"), showProgress = FALSE, na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") ) cat(crayon::green("\tApr - Jun...")) data.q2 <- data.table::fread( - paste0(path,"/lfsp_aj21_eul_pwt20.tab"), showProgress = FALSE, + paste0(path,"/lfsp_aj21_eul_pwt22.tab"), showProgress = FALSE, na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") ) cat(crayon::green("\tJul - Sep...")) data.q3 <- data.table::fread( - paste0(path,"/lfsp_js21_eul_pwt20.tab"), showProgress = FALSE, + paste0(path,"/lfsp_js21_eul_pwt22.tab"), showProgress = FALSE, na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") ) cat(crayon::green("\tOct - Dec...")) data.q4 <- data.table::fread( - paste0(path,"/lfsp_od21_eul_pwt20.tab"), showProgress = FALSE, + paste0(path,"/lfsp_od21_eul_pwt22.tab"), showProgress = FALSE, na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") ) cat(crayon::yellow("\tdone\n")) @@ -55,7 +55,7 @@ lfs_read_2021 <- function( setnames(data, names(data), tolower(names(data))) - weights_vars <- Hmisc::Cs(pwt20,piwt20) + weights_vars <- Hmisc::Cs(pwt22,piwt22) demographic_vars <- Hmisc::Cs(age,sex,gor9d,ethukeul,marsta) education_vars <- Hmisc::Cs(edage,hiqul15d,hiqual15,bte11,sctvc11,gnvq11,nvq11,rsa11,cag11,numol5,numal,numas,hst,advhst,typhst1,typhst2,typhst3,typhst4,typhst5, gcseful1,gcseful2,gcseful3,gcseful4,qgcse41,qgcse42,qgcse43,qgcse44, @@ -79,7 +79,7 @@ lfs_read_2021 <- function( # rename variables which have names which change over time but don't need cleaning separately, and variables # which don't change over time at all. - data.table::setnames(data, c("refwkm", "pwt20","piwt20","bte11","sctvc11","gnvq11","nvq11","rsa11","cag11","numol5", + data.table::setnames(data, c("refwkm", "pwt22","piwt22","bte11","sctvc11","gnvq11","nvq11","rsa11","cag11","numol5", "qual_1","qual_2","qual_3","qual_4","qual_5","qual_6","qual_7","qual_8","qual_9","qual_10", "qual_11","qual_12","qual_13","qual_14","qual_15","qual_16","qual_17","qual_18","qual_19","qual_20","qual_21","qual_22", "qual_23","qual_24","qual_25","qual_26","qual_27","qual_28","qual_29","qual_30","qual_31"), diff --git a/R/lfs_read_2022.R b/R/lfs_read_2022.R index 8614b51..8784625 100644 --- a/R/lfs_read_2022.R +++ b/R/lfs_read_2022.R @@ -20,7 +20,7 @@ lfs_read_2022 <- function( ###### Read in each quarter cat(crayon::green("\tJan - Mar...")) data.q1 <- data.table::fread( - paste0(path,"/lfsp_jm22_eul_pwt20.tab"), showProgress = FALSE, + paste0(path,"/lfsp_jm22_eul_pwt22.tab"), showProgress = FALSE, na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") ) @@ -50,39 +50,7 @@ lfs_read_2022 <- function( clean.data.list <- list() ##### loop the cleaning function over the four quarters - for (l in c(1)) { - data <- data.list[[l]] - - setnames(data, names(data), tolower(names(data))) - - weights_vars <- Hmisc::Cs(pwt20,piwt20) - demographic_vars <- Hmisc::Cs(age,sex,gor9d,ethukeul,marsta) - education_vars <- Hmisc::Cs(edage,hiqul22d,hiqual22) - work_vars <- Hmisc::Cs(inecac05,grsswk,ftptwk,ttachr,ttushr,mpnr02,publicr,indc07m,indd07m,inds07m,soc20m,sc20mmn, - undemp,undhrs,ovhrs,lespay2) - other_vars <- Hmisc::Cs(refwkm,thiswv) - - names <- c(demographic_vars,education_vars,work_vars, weights_vars,other_vars) - names <- tolower(names) - - data <- data[ ,names, with=F] - - data$quarter <- l - data$year <- 2022 - - ### tidy data - - # rename variables which have names which change over time but don't need cleaning separately, and variables - # which don't change over time at all. - - data.table::setnames(data, - c("refwkm", "pwt20","piwt20"), - c("month", "pwt", "piwt") ) - - clean.data.list[[l]] <- data - } - - for (l in c(2:4)) { + for (l in c(1:4)) { data <- data.list[[l]] setnames(data, names(data), tolower(names(data))) diff --git a/R/lfs_read_2023.R b/R/lfs_read_2023.R index 93ecce3..5f6f32c 100644 --- a/R/lfs_read_2023.R +++ b/R/lfs_read_2023.R @@ -24,33 +24,33 @@ lfs_read_2023 <- function( na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") ) - # cat(crayon::green("\tApr - Jun...")) - # data.q2 <- data.table::fread( - # paste0(path,"/lfsp_aj21_eul_pwt20.tab"), showProgress = FALSE, - # na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") - # ) + cat(crayon::green("\tApr - Jun...")) + data.q2 <- data.table::fread( + paste0(path,"/lfsp_aj23_eul_pwt22.tab"), showProgress = FALSE, + na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") + ) # # cat(crayon::green("\tJul - Sep...")) # data.q3 <- data.table::fread( - # paste0(path,"/lfsp_js21_eul_pwt20.tab"), showProgress = FALSE, + # paste0(path,"/lfsp_js23_eul_pwt22.tab"), showProgress = FALSE, # na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") # ) # # cat(crayon::green("\tOct - Dec...")) # data.q4 <- data.table::fread( - # paste0(path,"/lfsp_od21_eul_pwt20.tab"), showProgress = FALSE, + # paste0(path,"/lfsp_od23_eul_pwt22.tab"), showProgress = FALSE, # na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A") # ) cat(crayon::yellow("\tdone\n")) ###### group data tables into a list and initialize a list to store cleaned data tables in - data.list <- list(data.q1) + data.list <- list(data.q1, data.q2) clean.data.list <- list() ##### loop the cleaning function over the four quarters - for (l in c(1)) { + for (l in c(1:2)) { data <- data.list[[l]] setnames(data, names(data), tolower(names(data))) @@ -85,7 +85,8 @@ lfs_read_2023 <- function( ### combine quarters into a single data table - data <- rbind(clean.data.list[[1]], fill=TRUE) + data <- rbind(clean.data.list[[1]], + clean.data.list[[2]], fill=TRUE) data <- setDT(data)