Skip to content

Commit

Permalink
update read functions to use renamed quarterly LFS datasets from 2014-
Browse files Browse the repository at this point in the history
  • Loading branch information
djmorris1989 committed Oct 4, 2023
1 parent 15420a4 commit 301d6b1
Show file tree
Hide file tree
Showing 10 changed files with 36 additions and 119 deletions.
2 changes: 1 addition & 1 deletion R/lfs_read_2014.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ lfs_read_2014 <- function(

cat(crayon::green("\tJul - Sep..."))
data.q3 <- data.table::fread(
paste0(path ,"/lfsp_js14_end_user.tab"), showProgress = FALSE,
paste0(path ,"/lfsp_js14_eul_pwt18.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

Expand Down
2 changes: 1 addition & 1 deletion R/lfs_read_2015.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ lfs_read_2015 <- function(

cat(crayon::green("\tJul - Sep..."))
data.q3 <- data.table::fread(
paste0(path ,"/lfsp_js15_eul.tab"), showProgress = FALSE,
paste0(path ,"/lfsp_js15_eul_pwt18.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

Expand Down
2 changes: 1 addition & 1 deletion R/lfs_read_2016.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ lfs_read_2016 <- function(

cat(crayon::green("\tJul - Sep..."))
data.q3 <- data.table::fread(
paste0(path ,"/lfsp_js16_eul.tab"), showProgress = FALSE,
paste0(path ,"/lfsp_js16_eul_pwt18.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

Expand Down
2 changes: 1 addition & 1 deletion R/lfs_read_2017.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ lfs_read_2017 <- function(

cat(crayon::green("\tJul - Sep..."))
data.q3 <- data.table::fread(
paste0(path ,"/lfsp_js17_eul.tab"), showProgress = FALSE,
paste0(path ,"/lfsp_js17_eul_pwt18.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

Expand Down
2 changes: 1 addition & 1 deletion R/lfs_read_2018.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ lfs_read_2018 <- function(

cat(crayon::green("\tJul - Sep..."))
data.q3 <- data.table::fread(
paste0(path ,"/lfsp_js18_eul.tab"), showProgress = FALSE,
paste0(path ,"/lfsp_js18_eul_pwt18.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

Expand Down
2 changes: 1 addition & 1 deletion R/lfs_read_2019.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ lfs_read_2019 <- function(

cat(crayon::green("\tJul - Sep..."))
data.q3 <- data.table::fread(
paste0(path ,"/lfsp_js19_eul.tab"), showProgress = FALSE,
paste0(path ,"/lfsp_js19_eul_pwt18.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

Expand Down
74 changes: 11 additions & 63 deletions R/lfs_read_2020.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,25 @@ lfs_read_2020 <- function(
###### Read in each quarter
cat(crayon::green("\tJan - Mar..."))
data.q1 <- data.table::fread(
paste0(path, "/lfsp_jm20_eul_pwt18.tab"), showProgress = FALSE,
paste0(path, "/lfsp_jm20_eul_pwt22.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

cat(crayon::green("\tApr - Jun..."))
data.q2 <- data.table::fread(
paste0(path, "/lfsp_aj20_eul_pwt18.tab"), showProgress = FALSE,
paste0(path, "/lfsp_aj20_eul_pwt22.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

cat(crayon::green("\tJul - Sep..."))
data.q3 <- data.table::fread(
paste0(path, "/lfsp_js20_eul_pwt18.tab"), showProgress = FALSE,
paste0(path, "/lfsp_js20_eul_pwt22.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

cat(crayon::green("\tOct - Dec..."))
data.q4 <- data.table::fread(
paste0(path, "/lfsp_od20_eul_pwt20.tab"), showProgress = FALSE,
paste0(path, "/lfsp_od20_eul_pwt22.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)
cat(crayon::yellow("\tdone\n"))
Expand All @@ -50,73 +50,18 @@ lfs_read_2020 <- function(
clean.data.list <- list()

##### loop the cleaning function over the four quarters
for (l in c(1:3)) {
for (l in c(1:4)) {
data <- data.list[[l]]

setnames(data, names(data), tolower(names(data)))

weights_vars <- Hmisc::Cs(pwt18,piwt18)
weights_vars <- Hmisc::Cs(pwt22,piwt22)
demographic_vars <- Hmisc::Cs(age,sex,gor9d,ethukeul,marsta)
education_vars <- Hmisc::Cs(edage,hiqul15d,hiqual15,bte11,sctvc11,gnvq11,nvq11,rsa11,cag11,numol5,numal,numas,hst,advhst,typhst1,typhst2,typhst3,typhst4,typhst5,
gcseful1,gcseful2,gcseful3,gcseful4,qgcse41,qgcse42,qgcse43,qgcse44,
qual_1,qual_2,qual_3,qual_4,qual_5,qual_6,qual_7,qual_8,qual_9,qual_10,qual_11,qual_12,qual_13,qual_14,
qual_15,qual_16,qual_17,qual_18,qual_19,qual_20,qual_21,qual_22,qual_23,qual_24,qual_25,qual_26,qual_27,
qual_28,qual_29,qual_30,qual_31)
work_vars <- Hmisc::Cs(inecac05,grsswk,ftptwk,ttachr,ttushr,mpnr02,publicr,indc07m,indd07m,inds07m,soc10m,sc10mmn,
undemp,undhrs,ovhrs,lespay2)
other_vars <- Hmisc::Cs(refwkm,thiswv)

names <- c(demographic_vars,education_vars,work_vars, weights_vars,other_vars)
names <- tolower(names)

data <- data[ ,names, with=F]

data$quarter <- l
data$year <- 2020

### tidy data

# rename variables which have names which change over time but don't need cleaning separately, and variables
# which don't change over time at all.

data.table::setnames(data, c("refwkm", "pwt18","piwt18","bte11","sctvc11","gnvq11","nvq11","rsa11","cag11","numol5",
"qual_1","qual_2","qual_3","qual_4","qual_5","qual_6","qual_7","qual_8","qual_9","qual_10",
"qual_11","qual_12","qual_13","qual_14","qual_15","qual_16","qual_17","qual_18","qual_19","qual_20","qual_21","qual_22",
"qual_23","qual_24","qual_25","qual_26","qual_27","qual_28","qual_29","qual_30","qual_31"),
c("month", "pwt", "piwt","btec","sctvec","gnvq","nvqlev","rsa","candg","numol",
"qf1","qf2","qf3","qf4","qf5","qf6","qf7","qf8","qf9","qf10",
"qf11","qf12","qf13","qf14", "qf15","qf16","qf17","qf18","qf19","qf20","qf21","qf22",
"qf23","qf24","qf25","qf26","qf27","qf28","qf29","qf30","qf31") )

#preliminary cleaning of the vocational qualification variables
data[btec %in% c(5,6), btec := NA]
data[sctvec %in% c(6,7), sctvec := NA]
data[gnvq %in% c(6,7), gnvq := NA]
data[nvqlev %in% c(6,7), nvqlev := NA]
data[rsa %in% c(5,6), rsa := NA]
data[candg %in% c(4,5), candg := NA]
data[numol %in% c(3), numol := NA]
data[numal %in% c(3), numal := NA]
data[numas %in% c(4), numas := NA]
data[hst %in% c(3), hst := NA]
data[advhst %in% c(3), advhst := NA]

clean.data.list[[l]] <- data
}

##### loop the cleaning function over the four quarters
for (l in c(4)) {
data <- data.list[[l]]

setnames(data, names(data), tolower(names(data)))

weights_vars <- Hmisc::Cs(pwt20,piwt20)
demographic_vars <- Hmisc::Cs(age,sex,gor9d,ethukeul,marsta)
education_vars <- Hmisc::Cs(hiqul15d,hiqual15,bte11,sctvc11,gnvq11,nvq11,rsa11,cag11,numol5,numal,numas,hst,advhst,typhst1,typhst2,typhst3,typhst4,typhst5,
gcseful1,gcseful2,gcseful3,gcseful4,qgcse41,qgcse42,qgcse43,qgcse44,
qual_1,qual_2,qual_3,qual_4,qual_5,qual_6,qual_7,qual_8,qual_9,qual_10,qual_11,qual_12,qual_13,qual_14,
qual_15,qual_16,qual_17,qual_18,qual_19,qual_20,qual_21,qual_22,qual_23,qual_24,qual_25,qual_26,qual_27,
qual_28,qual_29,qual_30,qual_31)
work_vars <- Hmisc::Cs(inecac05,grsswk,ftptwk,ttachr,ttushr,mpnr02,publicr,indc07m,indd07m,inds07m,soc10m,sc10mmn)
other_vars <- Hmisc::Cs(refwkm,thiswv)

Expand All @@ -133,7 +78,7 @@ lfs_read_2020 <- function(
# rename variables which have names which change over time but don't need cleaning separately, and variables
# which don't change over time at all.

data.table::setnames(data, c("refwkm", "pwt20","piwt20","bte11","sctvc11","gnvq11","nvq11","rsa11","cag11","numol5",
data.table::setnames(data, c("refwkm", "pwt22","piwt22","bte11","sctvc11","gnvq11","nvq11","rsa11","cag11","numol5",
"qual_1","qual_2","qual_3","qual_4","qual_5","qual_6","qual_7","qual_8","qual_9","qual_10",
"qual_11","qual_12","qual_13","qual_14","qual_15","qual_16","qual_17","qual_18","qual_19","qual_20","qual_21","qual_22",
"qual_23","qual_24","qual_25","qual_26","qual_27","qual_28","qual_29","qual_30","qual_31"),
Expand All @@ -160,7 +105,10 @@ lfs_read_2020 <- function(

### combine quarters into a single data table

data <- rbind(clean.data.list[[1]],clean.data.list[[2]],clean.data.list[[3]],clean.data.list[[4]], fill=TRUE)
data <- rbind(clean.data.list[[1]],
clean.data.list[[2]],
clean.data.list[[3]],
clean.data.list[[4]], fill=TRUE)

data <- data.table(data)

Expand Down
12 changes: 6 additions & 6 deletions R/lfs_read_2021.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,25 @@ lfs_read_2021 <- function(
###### Read in each quarter
cat(crayon::green("\tJan - Mar..."))
data.q1 <- data.table::fread(
paste0(path,"/lfsp_jm21_eul_pwt20.tab"), showProgress = FALSE,
paste0(path,"/lfsp_jm21_eul_pwt22.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

cat(crayon::green("\tApr - Jun..."))
data.q2 <- data.table::fread(
paste0(path,"/lfsp_aj21_eul_pwt20.tab"), showProgress = FALSE,
paste0(path,"/lfsp_aj21_eul_pwt22.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

cat(crayon::green("\tJul - Sep..."))
data.q3 <- data.table::fread(
paste0(path,"/lfsp_js21_eul_pwt20.tab"), showProgress = FALSE,
paste0(path,"/lfsp_js21_eul_pwt22.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

cat(crayon::green("\tOct - Dec..."))
data.q4 <- data.table::fread(
paste0(path,"/lfsp_od21_eul_pwt20.tab"), showProgress = FALSE,
paste0(path,"/lfsp_od21_eul_pwt22.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)
cat(crayon::yellow("\tdone\n"))
Expand All @@ -55,7 +55,7 @@ lfs_read_2021 <- function(

setnames(data, names(data), tolower(names(data)))

weights_vars <- Hmisc::Cs(pwt20,piwt20)
weights_vars <- Hmisc::Cs(pwt22,piwt22)
demographic_vars <- Hmisc::Cs(age,sex,gor9d,ethukeul,marsta)
education_vars <- Hmisc::Cs(edage,hiqul15d,hiqual15,bte11,sctvc11,gnvq11,nvq11,rsa11,cag11,numol5,numal,numas,hst,advhst,typhst1,typhst2,typhst3,typhst4,typhst5,
gcseful1,gcseful2,gcseful3,gcseful4,qgcse41,qgcse42,qgcse43,qgcse44,
Expand All @@ -79,7 +79,7 @@ lfs_read_2021 <- function(
# rename variables which have names which change over time but don't need cleaning separately, and variables
# which don't change over time at all.

data.table::setnames(data, c("refwkm", "pwt20","piwt20","bte11","sctvc11","gnvq11","nvq11","rsa11","cag11","numol5",
data.table::setnames(data, c("refwkm", "pwt22","piwt22","bte11","sctvc11","gnvq11","nvq11","rsa11","cag11","numol5",
"qual_1","qual_2","qual_3","qual_4","qual_5","qual_6","qual_7","qual_8","qual_9","qual_10",
"qual_11","qual_12","qual_13","qual_14","qual_15","qual_16","qual_17","qual_18","qual_19","qual_20","qual_21","qual_22",
"qual_23","qual_24","qual_25","qual_26","qual_27","qual_28","qual_29","qual_30","qual_31"),
Expand Down
36 changes: 2 additions & 34 deletions R/lfs_read_2022.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ lfs_read_2022 <- function(
###### Read in each quarter
cat(crayon::green("\tJan - Mar..."))
data.q1 <- data.table::fread(
paste0(path,"/lfsp_jm22_eul_pwt20.tab"), showProgress = FALSE,
paste0(path,"/lfsp_jm22_eul_pwt22.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

Expand Down Expand Up @@ -50,39 +50,7 @@ lfs_read_2022 <- function(
clean.data.list <- list()

##### loop the cleaning function over the four quarters
for (l in c(1)) {
data <- data.list[[l]]

setnames(data, names(data), tolower(names(data)))

weights_vars <- Hmisc::Cs(pwt20,piwt20)
demographic_vars <- Hmisc::Cs(age,sex,gor9d,ethukeul,marsta)
education_vars <- Hmisc::Cs(edage,hiqul22d,hiqual22)
work_vars <- Hmisc::Cs(inecac05,grsswk,ftptwk,ttachr,ttushr,mpnr02,publicr,indc07m,indd07m,inds07m,soc20m,sc20mmn,
undemp,undhrs,ovhrs,lespay2)
other_vars <- Hmisc::Cs(refwkm,thiswv)

names <- c(demographic_vars,education_vars,work_vars, weights_vars,other_vars)
names <- tolower(names)

data <- data[ ,names, with=F]

data$quarter <- l
data$year <- 2022

### tidy data

# rename variables which have names which change over time but don't need cleaning separately, and variables
# which don't change over time at all.

data.table::setnames(data,
c("refwkm", "pwt20","piwt20"),
c("month", "pwt", "piwt") )

clean.data.list[[l]] <- data
}

for (l in c(2:4)) {
for (l in c(1:4)) {
data <- data.list[[l]]

setnames(data, names(data), tolower(names(data)))
Expand Down
21 changes: 11 additions & 10 deletions R/lfs_read_2023.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,33 +24,33 @@ lfs_read_2023 <- function(
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

# cat(crayon::green("\tApr - Jun..."))
# data.q2 <- data.table::fread(
# paste0(path,"/lfsp_aj21_eul_pwt20.tab"), showProgress = FALSE,
# na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
# )
cat(crayon::green("\tApr - Jun..."))
data.q2 <- data.table::fread(
paste0(path,"/lfsp_aj23_eul_pwt22.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)
#
# cat(crayon::green("\tJul - Sep..."))
# data.q3 <- data.table::fread(
# paste0(path,"/lfsp_js21_eul_pwt20.tab"), showProgress = FALSE,
# paste0(path,"/lfsp_js23_eul_pwt22.tab"), showProgress = FALSE,
# na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
# )
#
# cat(crayon::green("\tOct - Dec..."))
# data.q4 <- data.table::fread(
# paste0(path,"/lfsp_od21_eul_pwt20.tab"), showProgress = FALSE,
# paste0(path,"/lfsp_od23_eul_pwt22.tab"), showProgress = FALSE,
# na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
# )
cat(crayon::yellow("\tdone\n"))

###### group data tables into a list and initialize a list to store cleaned data tables in

data.list <- list(data.q1)
data.list <- list(data.q1, data.q2)

clean.data.list <- list()

##### loop the cleaning function over the four quarters
for (l in c(1)) {
for (l in c(1:2)) {
data <- data.list[[l]]

setnames(data, names(data), tolower(names(data)))
Expand Down Expand Up @@ -85,7 +85,8 @@ lfs_read_2023 <- function(

### combine quarters into a single data table

data <- rbind(clean.data.list[[1]], fill=TRUE)
data <- rbind(clean.data.list[[1]],
clean.data.list[[2]], fill=TRUE)

data <- setDT(data)

Expand Down

0 comments on commit 301d6b1

Please sign in to comment.