Skip to content

Commit

Permalink
Merge pull request #63 from ncn-foreigners/dev
Browse files Browse the repository at this point in the history
merge from DEV before CRAN submission
  • Loading branch information
LukaszChrostowski authored Jan 11, 2025
2 parents d98bc0f + e02d28d commit 14e9dc7
Show file tree
Hide file tree
Showing 42 changed files with 2,898 additions and 2,631 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ Imports:
nleqslv,
doParallel,
foreach,
parallel
parallel,
formula.tools
LinkingTo:
Rcpp,
RcppArmadillo
Expand Down
10 changes: 10 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ S3method(deviance,nonprobsvy)
S3method(hatvalues,nonprobsvy)
S3method(logLik,nonprobsvy)
S3method(nobs,nonprobsvy)
S3method(nonprobsvycheck,nonprobsvy)
S3method(pop.size,nonprobsvy)
S3method(print,nonprobsvy)
S3method(print,nonprobsvycheck)
S3method(print,summary_nonprobsvy)
S3method(residuals,nonprobsvy)
S3method(summary,nonprobsvy)
Expand All @@ -21,6 +23,7 @@ export(controlSel)
export(genSimData)
export(logit_model_nonprobsvy)
export(nonprob)
export(nonprobsvycheck)
export(pop.size)
export(probit_model_nonprobsvy)
import(Rcpp)
Expand All @@ -34,13 +37,16 @@ importFrom(Rcpp,sourceCpp)
importFrom(doParallel,registerDoParallel)
importFrom(foreach,"%dopar%")
importFrom(foreach,foreach)
importFrom(formula.tools,lhs.vars)
importFrom(formula.tools,rhs.vars)
importFrom(maxLik,maxLik)
importFrom(ncvreg,cv.ncvreg)
importFrom(nleqslv,nleqslv)
importFrom(parallel,makeCluster)
importFrom(parallel,stopCluster)
importFrom(stats,AIC)
importFrom(stats,BIC)
importFrom(stats,aggregate)
importFrom(stats,as.formula)
importFrom(stats,binomial)
importFrom(stats,coef)
Expand All @@ -63,11 +69,13 @@ importFrom(stats,model.frame)
importFrom(stats,model.matrix)
importFrom(stats,model.response)
importFrom(stats,nobs)
importFrom(stats,plogis)
importFrom(stats,pnorm)
importFrom(stats,predict)
importFrom(stats,predict.glm)
importFrom(stats,printCoefmat)
importFrom(stats,pt)
importFrom(stats,qlogis)
importFrom(stats,qnorm)
importFrom(stats,rbinom)
importFrom(stats,rchisq)
Expand All @@ -77,6 +85,7 @@ importFrom(stats,rexp)
importFrom(stats,rnorm)
importFrom(stats,runif)
importFrom(stats,sd)
importFrom(stats,setNames)
importFrom(stats,summary.glm)
importFrom(stats,terms)
importFrom(stats,uniroot)
Expand All @@ -85,6 +94,7 @@ importFrom(stats,var)
importFrom(stats,vcov)
importFrom(stats,weighted.mean)
importFrom(survey,as.svrepdesign)
importFrom(survey,svyby)
importFrom(survey,svymean)
importFrom(survey,svyrecvar)
importFrom(survey,svytotal)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

### Features
- two additional datasets have been included: `jvs` (Job Vacancy Survey; a probability sample survey) and `admin` (Central Job Offers Database; a non-probability sample survey). The units and auxiliary variables have been aligned in a way that allows the data to be integrated using the methods implemented in this package.
- a `nonprobsvycheck` function was added to check the balance in the totals of the variables based on the weighted weights between the non-probability and probability samples.

### Bugfixes
- basic methods and functions related to variance estimation, weights and probability linking methods have been rewritten in a more optimal and readable way.

# nonprobsvy 0.1.1

Expand Down
31 changes: 0 additions & 31 deletions R/beta_funcs.R

This file was deleted.

44 changes: 0 additions & 44 deletions R/bias_correction_ipw.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,50 +32,6 @@ mm <- function(X,
par0 <- start
}
prior_weights <- c(weights_rand, weights)

# MI - bias correction #########
# multiroot <- nleqslv::nleqslv( # TODO to fix "Jacobian is completely unusable (all zero entries?)"
# x = rep(0, p), # TODO add user-specified parameters to control functions
# fn = u_beta_mi,# TODO algorithm did not converge in maxit iterations for cloglog
# R = R,
# X = X,
# y = y,
# weights = prior_weights,
# family_nonprobsvy = family
# )
# print(multiroot$x)
##########

# IPW - bias correction #########
# multiroot <- nleqslv::nleqslv(
# x = rep(0, p), # TODO add user-specified parameters to control functions
# fn = u_theta_ipw,
# method = "Newton", # TODO consider the method Broyden
# global = "qline", # c("dbldog", "pwldog", cline", "qline", "gline", "hook", "none")
# xscalm = "fixed", # c("fixed","auto")
# jacobian = TRUE,
# control = list(scalex = rep(1, length(rep(0, p)))), # TODO algorithm did not converge in maxit iterations for cloglog
# R = R,
# X = X,
# y = y,
# weights = weights,
# method_selection = method_selection
# )
# print(multiroot$x)
##########

######### BB
# multiroot <- nleqslv::nleqslv(
# par = par0, # TODO add user-specified parameters to control functions
# fn = u_theta_beta_dr,
# R = R,
# X = X,
# y = y,
# weights = prior_weights,
# method_selection = method_selection,
# family_nonprobsvy = family
# )
# par_sel <- multiroot$par
######### NLESQLV
multiroot <- nleqslv::nleqslv(
x = par0,
Expand Down
17 changes: 0 additions & 17 deletions R/boot_ipw.R
Original file line number Diff line number Diff line change
Expand Up @@ -280,14 +280,6 @@ bootIPW_multicore <- function(X_rand,
ps_nons <- est_method_obj$ps_nons
weights_nons <- 1 / ps_nons
N_est_nons <- ifelse(is.null(pop_size), sum(weights[strap_nons] * weights_nons), pop_size)

# mu_hat_boot <- mu_hatIPW(
# y = y[strap_nons],
# weights = weights[strap_nons],
# weights_nons = weights_nons,
# N = N_est_nons
# ) # IPW estimator

mu_hats_this_boot <- numeric(mu_len)

for (l in 1:mu_len) {
Expand Down Expand Up @@ -321,13 +313,6 @@ bootIPW_multicore <- function(X_rand,

weights_nons <- 1 / ps_nons
N_est_nons <- ifelse(is.null(pop_size), sum(weights_strap * weights_nons), pop_size)

# mu_hat_boot <- mu_hatIPW(
# y = y[strap],
# weights = weights_strap,
# weights_nons = weights_nons,
# N = N_est_nons
# ) # IPW estimator
for (l in 1:mu_len) {
mu_hats_boot[k, l] <- mu_hatIPW(
y = ys[[l]][strap],
Expand All @@ -341,8 +326,6 @@ bootIPW_multicore <- function(X_rand,
}
)
mu_hats_boot <- matrix(mu_hats_boot, nrow = num_boot, ncol = mu_len, byrow = TRUE)
# mu_hats_boot_means <- colMeans(mu_hats_boot)
# boot_var <- 1 / (num_boot - 1) * sum((mu_hats - mu_hat_boot)^2)
for (l in 1:mu_len) {
boot_vars[l] <- 1 / (num_boot - 1) * sum((mu_hats_boot[, l] - mu_hats[l])^2)
}
Expand Down
38 changes: 0 additions & 38 deletions R/boot_mi.R
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,6 @@ bootMI <- function(X_rand,
weights_strap <- weights[strap]
X_nons_strap <- X_nons[strap, , drop = FALSE]
y_strap <- y[strap]

# strap_rand <- sample.int(replace = TRUE, n = n_rand, prob = 1/weights_rand)
# weights_rand_strap <- weights_rand[strap_rand]
# X_rand_strap <- X_rand[strap_rand, , drop = FALSE]
# N_strap <- sum(weights_rand_strap)

# using svy package
strap_rand_svy <- which(rep_weights[, k] != 0)
weights_rand_strap_svy <- rep_weights[, k] * weights_rand
Expand Down Expand Up @@ -276,8 +270,6 @@ bootMI <- function(X_rand,
mu_hat_boot <- as.vector(y_strap_rand)
mu_hats[k] <- mu_hat_boot
if (verbose) {
# info <- paste("iteration ", k, "/", num_boot, ", estimated mean = ", mu_hat_boot, sep = "")
# print(info)
setTxtProgressBar(pb, k)
}
k <- k + 1
Expand Down Expand Up @@ -309,8 +301,6 @@ bootMI <- function(X_rand,
mu_hat_boot <- mean(y_strap[model_rand$nn.idx])
mu_hats[k] <- mu_hat_boot
if (verbose) {
# info <- paste("iteration ", k, "/", num_boot, ", estimated mean = ", mu_hat_boot, sep = "")
# print(info)
utils::setTxtProgressBar(pb, k)
}
k <- k + 1
Expand Down Expand Up @@ -367,21 +357,9 @@ bootMI <- function(X_rand,
)
}
)
#
# model_rand <- nonprobMI_nn(
# data = y_strap_nons,
# query = y_strap_rand,
# k = control_outcome$k,
# treetype = control_outcome$treetype,
# searchtype = control_outcome$searchtype
# )


mu_hat_boot <- mean(y_strap[model_rand$nn.idx])
mu_hats[k] <- mu_hat_boot
if (verbose) {
# info <- paste("iteration ", k, "/", num_boot, ", estimated mean = ", mu_hat_boot, sep = "")
# print(info)
utils::setTxtProgressBar(pb, k)
}
k <- k + 1
Expand All @@ -396,7 +374,6 @@ bootMI <- function(X_rand,
}
}
}
# mu_hat_boot <- mean(mu_hats)
if (method == "pmm") {
if (nn_exact_se) {
comp2 <- pmm_exact(pi_ij,
Expand Down Expand Up @@ -499,7 +476,6 @@ bootMI_multicore <- function(X_rand,
strap_rand_svy <- which(rep_weights[, k] != 0)
weights_rand_strap_svy <- rep_weights[, k] * weights_rand
N_strap <- sum(weights_rand_strap_svy)
# X_rand_strap <- X_rand[which(rep_weights[,k] != 0),]

model_strap <- stats::glm.fit(
x = X_nons_strap,
Expand All @@ -525,11 +501,6 @@ bootMI_multicore <- function(X_rand,
X_nons_strap <- X_nons[strap, , drop = FALSE]
y_strap <- y[strap]

# strap_rand <- sample.int(replace = TRUE, n = n_rand, prob = 1/weights_rand)
# weights_rand_strap <- weights_rand[strap_rand]
# X_rand_strap <- X_rand[strap_rand, , drop = FALSE]
# N_strap <- sum(weights_rand_strap)

# using svy package
strap_rand_svy <- which(rep_weights[, k] != 0)
weights_rand_strap_svy <- rep_weights[, k] * weights_rand
Expand All @@ -546,7 +517,6 @@ bootMI_multicore <- function(X_rand,
)
y_rand_strap <- apply(model_rand$nn.idx, 1,
FUN = \(x) mean(y_strap[x])
# FUN=\(x) mean(sample_nonprob$short_[x])
)
weighted.mean(x = y_rand_strap, w = weights_strap_rand)
}
Expand All @@ -561,11 +531,6 @@ bootMI_multicore <- function(X_rand,
X_nons_strap <- X_nons[strap, , drop = FALSE]
y_strap <- y[strap]

# strap_rand <- sample.int(replace = TRUE, n = n_rand, prob = 1/weights_rand)
# weights_rand_strap <- weights_rand[strap_rand]
# X_rand_strap <- X_rand[strap_rand, , drop = FALSE]
# N_strap <- sum(weights_rand_strap)

# using svy package
strap_rand_svy <- which(rep_weights[, k] != 0)
weights_rand_strap_svy <- rep_weights[, k] * weights_rand
Expand Down Expand Up @@ -612,7 +577,6 @@ bootMI_multicore <- function(X_rand,

y_rand_strap <- apply(model_rand$nn.idx, 1,
FUN = \(x) mean(y_strap[x])
# FUN=\(x) mean(sample_nonprob$short_[x])
)
weighted.mean(x = y_rand_strap, w = weights_strap_rand)
}
Expand Down Expand Up @@ -640,8 +604,6 @@ bootMI_multicore <- function(X_rand,
beta <- model_strap$coefficients
eta <- pop_totals %*% beta / N
y_strap_rand <- family_nonprobsvy$linkinv(eta)

# mu_hat_boot <- mu_hatMI(ystrap_rand, weights_rand_strap_svy, N_strap)
as.vector(y_strap_rand)
}
)
Expand Down
Loading

0 comments on commit 14e9dc7

Please sign in to comment.