Skip to content

Commit

Permalink
add identifiers to longitudinal data and fix employment variable bug
Browse files Browse the repository at this point in the history
  • Loading branch information
djmorris1989 committed Sep 17, 2024
1 parent ac01b92 commit eed6901
Show file tree
Hide file tree
Showing 12 changed files with 71 additions and 16 deletions.
10 changes: 5 additions & 5 deletions R/lfs_clean_global_5q.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,10 @@ lfs_clean_global_5q <- function(data,
data[, empstat3cat5 := dplyr::case_match(incac055, c(1:4) ~ "employed", c(5) ~ "unemployed", c(6:33) ~ "inactive")]

data[, empstat8cat1 := dplyr::case_match(incac051, c(1,3,4) ~ "employed", c(2) ~ "self_employed", c(6,13,24) ~ "education", c(5) ~ "unemployed", c(8:9,15:16,26:27) ~ "sick", c(7,14,25) ~ "caring", c(20,31) ~ "retired", c(10:11,17:19,21:23,28:30,32:34) ~ "other")]
data[, empstat8cat2 := dplyr::case_match(incac051, c(1,3,4) ~ "employed", c(2) ~ "self_employed", c(6,13,24) ~ "education", c(5) ~ "unemployed", c(8:9,15:16,26:27) ~ "sick", c(7,14,25) ~ "caring", c(20,31) ~ "retired", c(10:11,17:19,21:23,28:30,32:34) ~ "other")]
data[, empstat8cat3 := dplyr::case_match(incac051, c(1,3,4) ~ "employed", c(2) ~ "self_employed", c(6,13,24) ~ "education", c(5) ~ "unemployed", c(8:9,15:16,26:27) ~ "sick", c(7,14,25) ~ "caring", c(20,31) ~ "retired", c(10:11,17:19,21:23,28:30,32:34) ~ "other")]
data[, empstat8cat4 := dplyr::case_match(incac051, c(1,3,4) ~ "employed", c(2) ~ "self_employed", c(6,13,24) ~ "education", c(5) ~ "unemployed", c(8:9,15:16,26:27) ~ "sick", c(7,14,25) ~ "caring", c(20,31) ~ "retired", c(10:11,17:19,21:23,28:30,32:34) ~ "other")]
data[, empstat8cat5 := dplyr::case_match(incac051, c(1,3,4) ~ "employed", c(2) ~ "self_employed", c(6,13,24) ~ "education", c(5) ~ "unemployed", c(8:9,15:16,26:27) ~ "sick", c(7,14,25) ~ "caring", c(20,31) ~ "retired", c(10:11,17:19,21:23,28:30,32:34) ~ "other")]
data[, empstat8cat2 := dplyr::case_match(incac052, c(1,3,4) ~ "employed", c(2) ~ "self_employed", c(6,13,24) ~ "education", c(5) ~ "unemployed", c(8:9,15:16,26:27) ~ "sick", c(7,14,25) ~ "caring", c(20,31) ~ "retired", c(10:11,17:19,21:23,28:30,32:34) ~ "other")]
data[, empstat8cat3 := dplyr::case_match(incac053, c(1,3,4) ~ "employed", c(2) ~ "self_employed", c(6,13,24) ~ "education", c(5) ~ "unemployed", c(8:9,15:16,26:27) ~ "sick", c(7,14,25) ~ "caring", c(20,31) ~ "retired", c(10:11,17:19,21:23,28:30,32:34) ~ "other")]
data[, empstat8cat4 := dplyr::case_match(incac054, c(1,3,4) ~ "employed", c(2) ~ "self_employed", c(6,13,24) ~ "education", c(5) ~ "unemployed", c(8:9,15:16,26:27) ~ "sick", c(7,14,25) ~ "caring", c(20,31) ~ "retired", c(10:11,17:19,21:23,28:30,32:34) ~ "other")]
data[, empstat8cat5 := dplyr::case_match(incac055, c(1,3,4) ~ "employed", c(2) ~ "self_employed", c(6,13,24) ~ "education", c(5) ~ "unemployed", c(8:9,15:16,26:27) ~ "sick", c(7,14,25) ~ "caring", c(20,31) ~ "retired", c(10:11,17:19,21:23,28:30,32:34) ~ "other")]

data[, empstat2cat1 := as.factor(empstat2cat1)]
data[, empstat2cat2 := as.factor(empstat2cat2)]
Expand Down Expand Up @@ -145,7 +145,7 @@ lfs_clean_global_5q <- function(data,
#################################
### remove variables not needed and retain

data <- data[, c("persid", "lgwt", "sex", "quarter", "month", "year", "empl_sequence",
data <- data[, c("id", "persid", "lgwt", "sex", "quarter", "month", "year", "empl_sequence",
"age1", "age2", "age3", "age4", "age5",
"hiqual1", "hiqual2", "hiqual3", "hiqual4", "hiqual5",
"disab1", "disab2", "disab3", "disab4", "disab5",
Expand Down
9 changes: 8 additions & 1 deletion R/lfs_read_5q_2012.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,18 @@ lfs_read_5q_2012 <- function(

###### group data tables into a list and initialize a list to store cleaned data tables in

## generate identifiers

data.q1[, month := 2]
data.q2[, month := 5]
data.q3[, month := 8]
data.q4[, month := 11]

data.q1[, id := paste0(1:nrow(data.q1),"-1-2012") ]
data.q2[, id := paste0(1:nrow(data.q2),"-2-2012") ]
data.q3[, id := paste0(1:nrow(data.q3),"-3-2012") ]
data.q4[, id := paste0(1:nrow(data.q4),"-4-2012") ]

data.list <- list(data.q1, data.q2, data.q3, data.q4)

clean.data.list <- list()
Expand All @@ -75,7 +82,7 @@ lfs_read_5q_2012 <- function(
c("lgwt17","hiqul11d1","hiqul11d2","hiqul11d3","hiqul11d4","hiqul11d5"),
c("lgwt22","hiqul22d1","hiqul22d2","hiqul22d3","hiqul22d4","hiqul22d5"))

id_weights_vars <- Hmisc::Cs(persid, lgwt22, month)
id_weights_vars <- Hmisc::Cs(id, persid, lgwt22, month)

demographic_vars <- Hmisc::Cs(sex,
age1, age2, age3, age4, age5)
Expand Down
7 changes: 6 additions & 1 deletion R/lfs_read_5q_2013.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ lfs_read_5q_2013 <- function(
data.q3[, month := 8]
data.q4[, month := 11]

data.q1[, id := paste0(1:nrow(data.q1),"-1-2013") ]
data.q2[, id := paste0(1:nrow(data.q2),"-2-2013") ]
data.q3[, id := paste0(1:nrow(data.q3),"-3-2013") ]
data.q4[, id := paste0(1:nrow(data.q4),"-4-2013") ]

data.list <- list(data.q1, data.q2, data.q3, data.q4)

clean.data.list <- list()
Expand All @@ -75,7 +80,7 @@ lfs_read_5q_2013 <- function(
c("lgwt17","hiqul11d1","hiqul11d2","hiqul11d3","hiqul11d4","hiqul11d5"),
c("lgwt22","hiqul22d1","hiqul22d2","hiqul22d3","hiqul22d4","hiqul22d5"))

id_weights_vars <- Hmisc::Cs(persid, lgwt22, month)
id_weights_vars <- Hmisc::Cs(id, persid, lgwt22, month)

demographic_vars <- Hmisc::Cs(sex,
age1, age2, age3, age4, age5)
Expand Down
7 changes: 6 additions & 1 deletion R/lfs_read_5q_2014.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ lfs_read_5q_2014 <- function(
data.q3[, month := 8]
data.q4[, month := 11]

data.q1[, id := paste0(1:nrow(data.q1),"-1-2014") ]
data.q2[, id := paste0(1:nrow(data.q2),"-2-2014") ]
data.q3[, id := paste0(1:nrow(data.q3),"-3-2014") ]
data.q4[, id := paste0(1:nrow(data.q4),"-4-2014") ]

data.list <- list(data.q1, data.q2, data.q3, data.q4)

clean.data.list <- list()
Expand All @@ -75,7 +80,7 @@ lfs_read_5q_2014 <- function(
c("lgwt17","hiqul11d1","hiqul11d2","hiqul11d3","hiqul11d4","hiqul15d5"),
c("lgwt22","hiqul22d1","hiqul22d2","hiqul22d3","hiqul22d4","hiqul22d5"))

id_weights_vars <- Hmisc::Cs(persid, lgwt22, month)
id_weights_vars <- Hmisc::Cs(id, persid, lgwt22, month)

demographic_vars <- Hmisc::Cs(sex,
age1, age2, age3, age4, age5)
Expand Down
7 changes: 6 additions & 1 deletion R/lfs_read_5q_2015.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ lfs_read_5q_2015 <- function(
data.q3[, month := 8]
data.q4[, month := 11]

data.q1[, id := paste0(1:nrow(data.q1),"-1-2015") ]
data.q2[, id := paste0(1:nrow(data.q2),"-2-2015") ]
data.q3[, id := paste0(1:nrow(data.q3),"-3-2015") ]
data.q4[, id := paste0(1:nrow(data.q4),"-4-2015") ]

data.list <- list(data.q1, data.q2, data.q3, data.q4)

clean.data.list <- list()
Expand All @@ -75,7 +80,7 @@ lfs_read_5q_2015 <- function(
c("lgwt17","hiul15d1","hiul15d2","hiul15d3","hiul15d4","hiul15d5"),
c("lgwt22","hiqul22d1","hiqul22d2","hiqul22d3","hiqul22d4","hiqul22d5"))

id_weights_vars <- Hmisc::Cs(persid, lgwt22, month)
id_weights_vars <- Hmisc::Cs(id, persid, lgwt22, month)

demographic_vars <- Hmisc::Cs(sex,
age1, age2, age3, age4, age5)
Expand Down
7 changes: 6 additions & 1 deletion R/lfs_read_5q_2016.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ lfs_read_5q_2016 <- function(
data.q3[, month := 8]
data.q4[, month := 11]

data.q1[, id := paste0(1:nrow(data.q1),"-1-2016") ]
data.q2[, id := paste0(1:nrow(data.q2),"-2-2016") ]
data.q3[, id := paste0(1:nrow(data.q3),"-3-2016") ]
data.q4[, id := paste0(1:nrow(data.q4),"-4-2016") ]

data.list <- list(data.q1, data.q2, data.q3, data.q4)

clean.data.list <- list()
Expand All @@ -75,7 +80,7 @@ lfs_read_5q_2016 <- function(
c("lgwt17","hiul15d1","hiul15d2","hiul15d3","hiul15d4","hiul15d5"),
c("lgwt22","hiqul22d1","hiqul22d2","hiqul22d3","hiqul22d4","hiqul22d5"))

id_weights_vars <- Hmisc::Cs(persid, lgwt22, month)
id_weights_vars <- Hmisc::Cs(id, persid, lgwt22, month)

demographic_vars <- Hmisc::Cs(sex,
age1, age2, age3, age4, age5)
Expand Down
7 changes: 6 additions & 1 deletion R/lfs_read_5q_2017.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ lfs_read_5q_2017 <- function(
data.q3[, month := 8]
data.q4[, month := 11]

data.q1[, id := paste0(1:nrow(data.q1),"-1-2017") ]
data.q2[, id := paste0(1:nrow(data.q2),"-2-2017") ]
data.q3[, id := paste0(1:nrow(data.q3),"-3-2017") ]
data.q4[, id := paste0(1:nrow(data.q4),"-4-2017") ]

data.list <- list(data.q1, data.q2, data.q3, data.q4)

clean.data.list <- list()
Expand All @@ -75,7 +80,7 @@ lfs_read_5q_2017 <- function(
c("lgwt17","hiul15d1","hiul15d2","hiul15d3","hiul15d4","hiul15d5"),
c("lgwt22","hiqul22d1","hiqul22d2","hiqul22d3","hiqul22d4","hiqul22d5"))

id_weights_vars <- Hmisc::Cs(persid, lgwt22, month)
id_weights_vars <- Hmisc::Cs(id, persid, lgwt22, month)

demographic_vars <- Hmisc::Cs(sex,
age1, age2, age3, age4, age5)
Expand Down
7 changes: 6 additions & 1 deletion R/lfs_read_5q_2018.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ lfs_read_5q_2018 <- function(
data.q3[, month := 8]
data.q4[, month := 11]

data.q1[, id := paste0(1:nrow(data.q1),"-1-2018") ]
data.q2[, id := paste0(1:nrow(data.q2),"-2-2018") ]
data.q3[, id := paste0(1:nrow(data.q3),"-3-2018") ]
data.q4[, id := paste0(1:nrow(data.q4),"-4-2018") ]

data.list <- list(data.q1, data.q2, data.q3, data.q4)

clean.data.list <- list()
Expand All @@ -75,7 +80,7 @@ lfs_read_5q_2018 <- function(
c("lgwt17","hiul15d1","hiul15d2","hiul15d3","hiul15d4","hiul15d5"),
c("lgwt22","hiqul22d1","hiqul22d2","hiqul22d3","hiqul22d4","hiqul22d5"))

id_weights_vars <- Hmisc::Cs(persid, lgwt22, month)
id_weights_vars <- Hmisc::Cs(id, persid, lgwt22, month)

demographic_vars <- Hmisc::Cs(sex,
age1, age2, age3, age4, age5)
Expand Down
7 changes: 6 additions & 1 deletion R/lfs_read_5q_2019.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ lfs_read_5q_2019 <- function(
data.q3[, month := 8]
data.q4[, month := 11]

data.q1[, id := paste0(1:nrow(data.q1),"-1-2019") ]
data.q2[, id := paste0(1:nrow(data.q2),"-2-2019") ]
data.q3[, id := paste0(1:nrow(data.q3),"-3-2019") ]
data.q4[, id := paste0(1:nrow(data.q4),"-4-2019") ]

data.list <- list(data.q1, data.q2, data.q3, data.q4)

clean.data.list <- list()
Expand All @@ -77,7 +82,7 @@ lfs_read_5q_2019 <- function(
c("hiul15d1","hiul15d2","hiul15d3","hiul15d4","hiul15d5"),
c("hiqul22d1","hiqul22d2","hiqul22d3","hiqul22d4","hiqul22d5"))

id_weights_vars <- Hmisc::Cs(persid, lgwt22, month)
id_weights_vars <- Hmisc::Cs(id, persid, lgwt22, month)

demographic_vars <- Hmisc::Cs(sex,
age1, age2, age3, age4, age5)
Expand Down
7 changes: 6 additions & 1 deletion R/lfs_read_5q_2020.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ lfs_read_5q_2020 <- function(
data.q3[, month := 8]
data.q4[, month := 11]

data.q1[, id := paste0(1:nrow(data.q1),"-1-2020") ]
data.q2[, id := paste0(1:nrow(data.q2),"-2-2020") ]
data.q3[, id := paste0(1:nrow(data.q3),"-3-2020") ]
data.q4[, id := paste0(1:nrow(data.q4),"-4-2020") ]

data.list <- list(data.q1, data.q2, data.q3, data.q4)

clean.data.list <- list()
Expand All @@ -75,7 +80,7 @@ lfs_read_5q_2020 <- function(
c("hiul15d1","hiul15d2","hiul15d3","hiul15d4","hiul15d5"),
c("hiqul22d1","hiqul22d2","hiqul22d3","hiqul22d4","hiqul22d5"))

id_weights_vars <- Hmisc::Cs(persid, lgwt22, month)
id_weights_vars <- Hmisc::Cs(id, persid, lgwt22, month)

demographic_vars <- Hmisc::Cs(sex,
age1, age2, age3, age4, age5,
Expand Down
7 changes: 6 additions & 1 deletion R/lfs_read_5q_2021.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ lfs_read_5q_2021 <- function(
data.q3[, month := 8]
data.q4[, month := 11]

data.q1[, id := paste0(1:nrow(data.q1),"-1-2021") ]
data.q2[, id := paste0(1:nrow(data.q2),"-2-2021") ]
data.q3[, id := paste0(1:nrow(data.q3),"-3-2021") ]
data.q4[, id := paste0(1:nrow(data.q4),"-4-2021") ]

data.list <- list(data.q1, data.q2, data.q3, data.q4)

clean.data.list <- list()
Expand All @@ -75,7 +80,7 @@ lfs_read_5q_2021 <- function(
c("hiul15d1","hiul15d2","hiul15d3","hiul15d4"),
c("hiqul22d1","hiqul22d2","hiqul22d3","hiqul22d4"))

id_weights_vars <- Hmisc::Cs(persid, lgwt22, month)
id_weights_vars <- Hmisc::Cs(id, persid, lgwt22, month)

demographic_vars <- Hmisc::Cs(sex,
age1, age2, age3, age4, age5,
Expand Down
5 changes: 4 additions & 1 deletion R/lfs_read_5q_2022.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ lfs_read_5q_2022 <- function(
data.q1[, month := 2]
data.q2[, month := 5]

data.q1[, id := paste0(1:nrow(data.q1),"-1-2022") ]
data.q2[, id := paste0(1:nrow(data.q2),"-2-2022") ]

data.list <- list(data.q1,data.q2)

clean.data.list <- list()
Expand All @@ -51,7 +54,7 @@ lfs_read_5q_2022 <- function(

setnames(data, names(data), tolower(names(data)))

id_weights_vars <- Hmisc::Cs(persid, lgwt22, month)
id_weights_vars <- Hmisc::Cs(id, persid, lgwt22, month)

demographic_vars <- Hmisc::Cs(sex,
age1, age2, age3, age4, age5,
Expand Down

0 comments on commit eed6901

Please sign in to comment.