Skip to content

Commit

Permalink
add extra waves of LFS longitudinal data
Browse files Browse the repository at this point in the history
  • Loading branch information
djmorris1989 committed Sep 17, 2024
1 parent 23f03d6 commit 3085583
Show file tree
Hide file tree
Showing 25 changed files with 1,760 additions and 56 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Description: Reads in raw Labour Force Survey data obtained under the End User L
License: GPL-3
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.3.0
RoxygenNote: 7.3.2
Depends:
R (>= 3.6.1)
Imports:
Expand Down
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ export(lfs_read_2020)
export(lfs_read_2021)
export(lfs_read_2022)
export(lfs_read_2023)
export(lfs_read_5q_2012)
export(lfs_read_5q_2013)
export(lfs_read_5q_2014)
export(lfs_read_5q_2015)
export(lfs_read_5q_2016)
export(lfs_read_5q_2017)
export(lfs_read_5q_2018)
export(lfs_read_5q_2019)
export(lfs_read_5q_2020)
export(lfs_read_5q_2021)
export(lfs_read_5q_2022)
Expand Down
156 changes: 156 additions & 0 deletions R/lfs_read_5q_2012.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#' Read LFS 2012 longitudinal
#'
#' Reads and performs basic cleaning on the Labour Force Survey five-quarter longitudinal
#' data with a 1st wave that began in calendar year 2012.
#'
#' @param root Character - the root directory
#' @param file Character - the file path and name
#'
#'
#' @return Returns a data table
#' @export
lfs_read_5q_2012 <- function(
root = c("X:/"),
file = "HAR_PR/PR/LFS/Data/longitudinal/tab/"
) {

path <- here::here(paste0(root[1], file))

cat(crayon::yellow("Reading LFS 2012:\n"))

###### Read in each quarter
cat(crayon::green("\tJan-Mar 2012 to Jan-Mar 2013"))
data.q1 <- data.table::fread(
paste0(path,"/five_q_longitudinal_jm12-jm13_eul_lgwt17.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

cat(crayon::yellow("\tdone\n"))


cat(crayon::green("\tApr-Jun 2012 to Apr-Jun 2013"))
data.q2 <- data.table::fread(
paste0(path,"/five_q_longitudinal_aj12-aj13_eul_lgwt17.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

cat(crayon::yellow("\tdone\n"))


cat(crayon::green("\tJul-Sep 2012 to Jul-Sep 2013"))
data.q3 <- data.table::fread(
paste0(path,"/five_q_longitudinal_js12-js13_eul_lgwt17.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

cat(crayon::yellow("\tdone\n"))


cat(crayon::green("\tOct-Dec 2012 to Oct-Dec 2013"))
data.q4 <- data.table::fread(
paste0(path,"/five_q_longitudinal_od12-od13_eul_lgwt17.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

cat(crayon::yellow("\tdone\n"))

###### group data tables into a list and initialize a list to store cleaned data tables in

data.list <- list(data.q1, data.q2, data.q3, data.q4)

clean.data.list <- list()

##### loop the cleaning function over the four quarters
for (l in c(1:4)) {
data <- data.list[[l]]

setnames(data, names(data), tolower(names(data)))

setnames(data,
c("lgwt17","hiqul11d1","hiqul11d2","hiqul11d3","hiqul11d4","hiqul11d5"),
c("lgwt22","hiqul22d1","hiqul22d2","hiqul22d3","hiqul22d4","hiqul22d5"))

id_weights_vars <- Hmisc::Cs(persid, lgwt22)

demographic_vars <- Hmisc::Cs(sex,
age1, age2, age3, age4, age5)

education_vars <- Hmisc::Cs(hiqul22d1, hiqul22d2, hiqul22d3, hiqul22d4, hiqul22d5)

empstat_vars <- Hmisc::Cs(ilodefr1, ilodefr2, ilodefr3, ilodefr4, ilodefr5,
incac051, incac052, incac053, incac054, incac055)

#health_vars <- Hmisc::Cs(disea1, disea2, disea3, disea4, disea5,
# illoff1, illoff2, illoff3, illoff4, illoff5)

#benefit_vars <- Hmisc::Cs(clims141, clims142, clims143, clims144, clims145,
# benfts1, benfts2, benfts3, benfts4, benfts5,
# ooben1, ooben2, ooben3, ooben4, ooben5)

work_vars <- Hmisc::Cs(ttushr1, ttushr2, ttushr3, ttushr4, ttushr5,
grsswk1, grsswk2, grsswk3, grsswk4, grsswk5)

names <- c(id_weights_vars, demographic_vars, education_vars, empstat_vars,
work_vars)

data <- data[ ,names, with=F]

data$quarter <- l
data$year <- 2012

clean.data.list[[l]] <- data
}

### combine quarters into a single data table

data <- rbind(clean.data.list[[1]],
clean.data.list[[2]],
clean.data.list[[3]],
clean.data.list[[4]], fill=TRUE)

### generate missing values for variables not in this year

data[, etukeul1 := NA]
data[, etukeul2 := NA]
data[, etukeul3 := NA]
data[, etukeul4 := NA]
data[, etukeul5 := NA]

data[, govtof21 := NA]
data[, govtof22 := NA]
data[, govtof23 := NA]
data[, govtof24 := NA]
data[, govtof25 := NA]

data[, disea1 := NA]
data[, disea2 := NA]
data[, disea3 := NA]
data[, disea4 := NA]
data[, disea5 := NA]

data[, illoff1 := NA]
data[, illoff2 := NA]
data[, illoff3 := NA]
data[, illoff4 := NA]
data[, illoff5 := NA]

data[, clims141 := NA]
data[, clims142 := NA]
data[, clims143 := NA]
data[, clims144 := NA]
data[, clims145 := NA]

data[, benfts1 := NA]
data[, benfts2 := NA]
data[, benfts3 := NA]
data[, benfts4 := NA]
data[, benfts5 := NA]

data[, ooben1 := NA]
data[, ooben2 := NA]
data[, ooben3 := NA]
data[, ooben4 := NA]
data[, ooben5 := NA]

return(data)
}
156 changes: 156 additions & 0 deletions R/lfs_read_5q_2013.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#' Read LFS 2013 longitudinal
#'
#' Reads and performs basic cleaning on the Labour Force Survey five-quarter longitudinal
#' data with a 1st wave that began in calendar year 2013.
#'
#' @param root Character - the root directory
#' @param file Character - the file path and name
#'
#'
#' @return Returns a data table
#' @export
lfs_read_5q_2013 <- function(
root = c("X:/"),
file = "HAR_PR/PR/LFS/Data/longitudinal/tab/"
) {

path <- here::here(paste0(root[1], file))

cat(crayon::yellow("Reading LFS 2013:\n"))

###### Read in each quarter
cat(crayon::green("\tJan-Mar 2013 to Jan-Mar 2014"))
data.q1 <- data.table::fread(
paste0(path,"/five_q_longitudinal_jm13-jm14_eul_lgwt17.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

cat(crayon::yellow("\tdone\n"))


cat(crayon::green("\tApr-Jun 2013 to Apr-Jun 2014"))
data.q2 <- data.table::fread(
paste0(path,"/five_q_longitudinal_aj13-aj14_eul_lgwt17.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

cat(crayon::yellow("\tdone\n"))


cat(crayon::green("\tJul-Sep 2013 to Jul-Sep 2014"))
data.q3 <- data.table::fread(
paste0(path,"/five_q_longitudinal_js13-js14_eul_lgwt17.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

cat(crayon::yellow("\tdone\n"))


cat(crayon::green("\tOct-Dec 2013 to Oct-Dec 2014"))
data.q4 <- data.table::fread(
paste0(path,"/five_q_longitudinal_od13-od14_eul_lgwt17.tab"), showProgress = FALSE,
na.strings = c("NA", "", "-1", "-2", "-6", "-7", "-8", "-9", "-90", "-90.0", "N/A")
)

cat(crayon::yellow("\tdone\n"))

###### group data tables into a list and initialize a list to store cleaned data tables in

data.list <- list(data.q1, data.q2, data.q3, data.q4)

clean.data.list <- list()

##### loop the cleaning function over the four quarters
for (l in c(1:4)) {
data <- data.list[[l]]

setnames(data, names(data), tolower(names(data)))

setnames(data,
c("lgwt17","hiqul11d1","hiqul11d2","hiqul11d3","hiqul11d4","hiqul11d5"),
c("lgwt22","hiqul22d1","hiqul22d2","hiqul22d3","hiqul22d4","hiqul22d5"))

id_weights_vars <- Hmisc::Cs(persid, lgwt22)

demographic_vars <- Hmisc::Cs(sex,
age1, age2, age3, age4, age5)

education_vars <- Hmisc::Cs(hiqul22d1, hiqul22d2, hiqul22d3, hiqul22d4, hiqul22d5)

empstat_vars <- Hmisc::Cs(ilodefr1, ilodefr2, ilodefr3, ilodefr4, ilodefr5,
incac051, incac052, incac053, incac054, incac055)

#health_vars <- Hmisc::Cs(disea1, disea2, disea3, disea4, disea5,
# illoff1, illoff2, illoff3, illoff4, illoff5)

#benefit_vars <- Hmisc::Cs(clims141, clims142, clims143, clims144, clims145,
# benfts1, benfts2, benfts3, benfts4, benfts5,
# ooben1, ooben2, ooben3, ooben4, ooben5)

work_vars <- Hmisc::Cs(ttushr1, ttushr2, ttushr3, ttushr4, ttushr5,
grsswk1, grsswk2, grsswk3, grsswk4, grsswk5)

names <- c(id_weights_vars, demographic_vars, education_vars, empstat_vars,
work_vars)

data <- data[ ,names, with=F]

data$quarter <- l
data$year <- 2013

clean.data.list[[l]] <- data
}

### combine quarters into a single data table

data <- rbind(clean.data.list[[1]],
clean.data.list[[2]],
clean.data.list[[3]],
clean.data.list[[4]], fill=TRUE)

### generate missing values for variables not in this year

data[, etukeul1 := NA]
data[, etukeul2 := NA]
data[, etukeul3 := NA]
data[, etukeul4 := NA]
data[, etukeul5 := NA]

data[, govtof21 := NA]
data[, govtof22 := NA]
data[, govtof23 := NA]
data[, govtof24 := NA]
data[, govtof25 := NA]

data[, disea1 := NA]
data[, disea2 := NA]
data[, disea3 := NA]
data[, disea4 := NA]
data[, disea5 := NA]

data[, illoff1 := NA]
data[, illoff2 := NA]
data[, illoff3 := NA]
data[, illoff4 := NA]
data[, illoff5 := NA]

data[, clims141 := NA]
data[, clims142 := NA]
data[, clims143 := NA]
data[, clims144 := NA]
data[, clims145 := NA]

data[, benfts1 := NA]
data[, benfts2 := NA]
data[, benfts3 := NA]
data[, benfts4 := NA]
data[, benfts5 := NA]

data[, ooben1 := NA]
data[, ooben2 := NA]
data[, ooben3 := NA]
data[, ooben4 := NA]
data[, ooben5 := NA]

return(data)
}
Loading

0 comments on commit 3085583

Please sign in to comment.