From 9bd5ee8b58e3229256f4bc7c20420328ab11718f Mon Sep 17 00:00:00 2001 From: amy Date: Mon, 13 Apr 2026 15:53:28 -0700 Subject: [PATCH 1/5] reduce some tests; remove some files --- R/asymmetric_hat.R | 63 ------------------------- R/eta_jacobian.R | 37 --------------- R/expected_info_repar.R | 52 -------------------- R/update_data.R | 49 ------------------- tests/testthat/test-fit_null_discrete.R | 8 ++-- 5 files changed, 4 insertions(+), 205 deletions(-) delete mode 100644 R/asymmetric_hat.R delete mode 100644 R/eta_jacobian.R delete mode 100644 R/expected_info_repar.R delete mode 100644 R/update_data.R diff --git a/R/asymmetric_hat.R b/R/asymmetric_hat.R deleted file mode 100644 index 8c908ff..0000000 --- a/R/asymmetric_hat.R +++ /dev/null @@ -1,63 +0,0 @@ - -#this function creates the "asymmetric hat" matrix used for -#creating data augmentations as described in Kosmidis & Firth (2011) - -asymmetric_hat <- function(beta_tilde_J, #beta expressed as a vector - tau, #column sums of Y (without data augmentations) - X_tilde_J, #expanded design matrix (for Y as a single vector) - J, #number of taxa - n, #number of samples - p, #number of predictors, incl. intercept - collect_jacobians = TRUE, - verbose= FALSE){ - - #get the expected information matrix - expected_info_list <- expected_info_repar(beta_tilde_J, - tau, - X_tilde_J, - J, - n, - p, - verbose = verbose) - expected_info <- expected_info_list$expected_info - # expected_info_eigen <- eigen(expected_info) - - #eigendecompose expected information after removing J-th taxon's parameters - #(implicitly imposing B^J = 0) - expected_info_eigen <- eigen(expected_info[1:(p*(J - 1)),1:(p*(J - 1))]) - - # get info^(-0.5) using eigendecomposition - expected_info_half_inv <- expected_info_eigen$vectors %*% - diag(sapply( expected_info_eigen$values,function(x) ifelse(x>0,1/sqrt(x),0))) %*% - t(expected_info_eigen$vectors) - - # now we add on information in tau - tau_start <- p*(J - 1) + 1 #index where tau starts - tau_end <- nrow(expected_info) #index where tau ends - #expected info is then block diagonal with one block for beta and one for tau: - expected_info_half_inv <- Matrix::bdiag(expected_info_half_inv, - Matrix::Diagonal( - x = 1/sqrt(diag(expected_info)[tau_start:tau_end]))) - - #compute augmentations - augmentations <- matrix(0,nrow = n, ncol = J) - - for(i in 1:n){ - if(verbose){ - message("Computing data augmentation for observation ",i,".") - } - which_indices <- 1:J + (i - 1)*J #hmm maybe this isn't used... - jacobian_i <- expected_info_list$jacobians[[i]] #jacobian (d[mean]/d(beta,tau), I believe) - mu_i <- expected_info_list$mus[[i]] # means for i-th observation - postmult_jacobian_i <- jacobian_i%*% expected_info_half_inv #working toward asymmetric hat - - #entries of asymmetric hat matrix for observation i: - H_i <- Matrix::tcrossprod(postmult_jacobian_i)%*%diag(as.numeric(mu_i)) - #diagonals are data augmentations: - augmentations[i,] <- diag(as.matrix(H_i))[1:J]/2 - } - - return(augmentations) - - -} diff --git a/R/eta_jacobian.R b/R/eta_jacobian.R deleted file mode 100644 index 4363f29..0000000 --- a/R/eta_jacobian.R +++ /dev/null @@ -1,37 +0,0 @@ - -#function to get jacobian of mean function for observation i wrt beta, tau -#used in getting data augmentations -eta_jacobian <- function(beta_tilde_J, #long format beta (w.o. J-th taxon, I believe) - tau, #colsums of Y (no augmentations) - X_tilde_J, #expanded design matrix (for Y_vec) - J, #number taxa - i, #observation index - n #number of observations - ){ - - which_indices <- 1:J + (i - 1)*J - p_i <- exp(X_tilde_J[which_indices,]%*%beta_tilde_J) - p_i <- p_i/sum(p_i) - premult_mat <- Matrix::Matrix(1,nrow = J, ncol = 1)%*%Matrix::t(p_i) - premult_mat <- Matrix::Diagonal(x = rep(1,J)) - premult_mat - d_eta_i_d_beta_tilde <- premult_mat%*%X_tilde_J[which_indices,] - # d_eta_i_d_beta_tilde <- as(d_eta_i_d_beta_tilde,"sparseMatrix") - # e_i <- Matrix(0,nrow = 1, ncol = n) - # e_i[1,i] <- 1 - # d_eta_i_d_tau <- Matrix::Matrix(0,nrow = J, ncol = n) - # d_eta_i_d_tau[,i] <- 1/tau[i] - - d_eta_i_d_tau <- Matrix::sparseMatrix(i = 1:J, - j = rep(i, J), - x = rep(1/tau[i],J), - dims = c(J, n)) - - jacobian <- cbind(d_eta_i_d_beta_tilde, d_eta_i_d_tau) - # jacobian <- list() - # jacobian <- list("dbeta" = d_eta_i_d_beta_tilde, - # "dtau" = d_eta_i_d_tau) - - - - return(jacobian) -} diff --git a/R/expected_info_repar.R b/R/expected_info_repar.R deleted file mode 100644 index 90392da..0000000 --- a/R/expected_info_repar.R +++ /dev/null @@ -1,52 +0,0 @@ - -#another function to get expected information (and possibly jacobians of -#mean function in parameters) -expected_info_repar <- function(beta_tilde_J, - tau, - X_tilde_J, - J, - n, - p, - collect_byproducts = TRUE, - verbose = FALSE){ - - expected_info <- Matrix::Matrix(0,nrow = n + p*(J - 1), ncol = n + p*(J - 1)) - - if(collect_byproducts){ - jacobians <- vector(n, mode = "list") - mus <- vector(n,mode = "list") - } else{ - jacobians <- NULL - mus <- NULL - } - # info_bits <- vector(n,mode = "list") - - # pv5 <- profvis({ - for(i in 1:n){ - if(verbose){ - message("Computing information contribution of observation ", i,".") - } - - which_indices <- 1:J + (i - 1)*J - d_eta_i_d_theta_J <- eta_jacobian(beta_tilde_J, - tau, - X_tilde_J, - J, - i, - n) - if(collect_byproducts){ - jacobians[[i]] <- d_eta_i_d_theta_J - } - mu_i <- exp(X_tilde_J[which_indices,]%*%beta_tilde_J + log(tau[i]) - - log(sum(exp(X_tilde_J[which_indices,]%*%beta_tilde_J )))) - mus[[i]] <- mu_i - expected_info <- expected_info + - Matrix::crossprod(d_eta_i_d_theta_J,Matrix::crossprod(Matrix::Diagonal(x = as.numeric(mu_i)),d_eta_i_d_theta_J)) - # Matrix::t(d_eta_i_d_theta_J)%*%diag(as.numeric(mu_i)) %*% - # d_eta_i_d_theta_J - } - # }) - return(list("expected_info" = expected_info, - "jacobians" = jacobians, - "mus" = mus)) -} diff --git a/R/update_data.R b/R/update_data.R deleted file mode 100644 index 5e9c1d6..0000000 --- a/R/update_data.R +++ /dev/null @@ -1,49 +0,0 @@ - - -update_data <- function(Y, - X_tilde, - B, - p, - n, - J, - verbose = FALSE){ - X_tilde_J <- X_tilde - indexes_to_zero <- numeric(0) - for(i in 1:n){ - indexes_to_zero <- c(indexes_to_zero,(i - 1)*J + J) - } - X_tilde_J[indexes_to_zero,] <- 0 - # for(i in 1:n){ - # X_tilde_J[(i - 1)*J + J,] <- 0#*X_tilde_J[(i - 1)*J + J,] - # } - X_tilde_J <- X_tilde_J[,1:(p*(J - 1))] - B_J <- B - for(k in 1:p){ - B_J[k,] <- B_J[k,] - B_J[k,J] - } - beta_tilde_J <- B_cup_from_B(B_J[,1:(J - 1),drop= FALSE]) - - tau <- rowSums(Y) - if(sum(tau ==0)>0){ - tau[tau == 0] <- 0.1 - warning("At least one observation has total count equal to zero!") - } - # sapply(1:n, - # function(i) - # log(sum(Y[i,])) - - # log(sum(exp(X_tilde_J[(i - 1)*J + 1:J,]%*%beta_tilde_J)))) - - - - augmentations <- - asymmetric_hat(beta_tilde_J = beta_tilde_J, - tau = tau, - X_tilde_J = X_tilde_J, - J = J, - n = n, - p = p, - verbose = verbose) - - return(Y + augmentations) -} - diff --git a/tests/testthat/test-fit_null_discrete.R b/tests/testthat/test-fit_null_discrete.R index 61b32d9..717bf57 100644 --- a/tests/testthat/test-fit_null_discrete.R +++ b/tests/testthat/test-fit_null_discrete.R @@ -2,8 +2,8 @@ test_that("new discrete is correct", { # set.seed(1) - n <- 40 - J <- 30 + n <- 20 + J <- 15 p <- 5 beta <- matrix(rnorm(p*J, mean = 2, sd=1), ncol = J) X <- cbind(1, @@ -116,7 +116,7 @@ test_that("new discrete is correct with flexible jref and jstar", { test_that("new discrete is fast", { - #skip("Too slow for automated testing; 20 mins on laptop") + skip("Too slow; just checks fit_null_discrete faster than fit_null.") set.seed(2) n <- 60 @@ -174,7 +174,7 @@ test_that("new discrete is fast", { test_that("new discrete null aligns with older code", { - skip("Too slow for automated testing; ~3 mins on laptop") + skip("Too slow. ~3 mins on laptop. Just checks alignment between versions.") set.seed(4) n <- 12 From 89939228c147dd6b49ba874b7fc3d03b16729d8c Mon Sep 17 00:00:00 2001 From: amy Date: Mon, 13 Apr 2026 16:25:56 -0700 Subject: [PATCH 2/5] remove some redundancies --- DESCRIPTION | 2 +- tests/testthat/test-emuFit.R | 153 ++++++------------------ tests/testthat/test-fit_null_discrete.R | 2 +- 3 files changed, 37 insertions(+), 120 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 71fe3e2..787d3f2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: radEmu Title: Using Relative Abundance Data to Estimate of Multiplicative Differences in Mean Absolute Abundance -Version: 2.3.1.0 +Version: 2.3.1.1 Authors@R: c(person("David", "Clausen", role = c("aut")), person("Amy D", "Willis", email = "adwillis@uw.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2802-4317")), person("Sarah", "Teichman", role = "aut")) diff --git a/tests/testthat/test-emuFit.R b/tests/testthat/test-emuFit.R index ffb2426..edfa5d4 100644 --- a/tests/testthat/test-emuFit.R +++ b/tests/testthat/test-emuFit.R @@ -1,5 +1,5 @@ set.seed(11) -J <- 6 +J <- 4 n <- 12 X <- cbind(1,rnorm(n)) b0 <- rnorm(J) @@ -33,16 +33,13 @@ test_that("emuFit takes formulas and actually fits a model", { verbose = FALSE, B_null_tol = 1e-2, tolerance = 0.01, - return_wald_p = FALSE, + return_wald_p = TRUE, compute_cis = TRUE, run_score_tests = TRUE, - test_kj = data.frame(k = 2, j = 1:6)) + test_kj = data.frame(k = 2, j = 1:J)) }) - - - ## emuFit takes formulas and actually fits a model (with score tests) - + expect_true(all(fitted_model$coef$pval != fitted_model$coef$wald_p)) expect_true(all(fitted_model$coef$wald_p>0 & fitted_model$coef$wald_p<1)) expect_true(all(fitted_model$coef$pval>0 & fitted_model$coef$pval<1)) expect_true(inherits(fitted_model$B,"matrix")) @@ -60,8 +57,7 @@ test_that("emuFit takes formulas and actually fits a model", { expect_identical(fitted_model$coef$estimate, second_model$coef$estimate) - ## emuFit takes formulas and actually fits a model (with score tests using full model info) - + ## changing whether we use the full model changes the p-values fitted_model_use_fullmodel_info <- emuFit(Y = Y, X = X, formula = ~group, @@ -69,68 +65,19 @@ test_that("emuFit takes formulas and actually fits a model", { tolerance = 0.01, data = covariates, run_score_tests= TRUE, - return_wald_p = TRUE, ### diff + return_wald_p = TRUE, control = list(use_fullmodel_info = TRUE), ### diff - verbose = FALSE, - test_kj = data.frame(k = 2, j = 1:6)) - - - expect_true(all(fitted_model_use_fullmodel_info$coef$wald_p>0 & fitted_model_use_fullmodel_info$coef$wald_p<1)) - expect_true(inherits(fitted_model_use_fullmodel_info$B,"matrix")) - expect_true(inherits(fitted_model_use_fullmodel_info$Y_augmented,"matrix")) - expect_true(all(fitted_model_use_fullmodel_info$coef$pval>0 & fitted_model_use_fullmodel_info$coef$pval<1)) - - ## if return_both_score_pvals = TRUE, emuFit runs and returns two non-identical p-values - - ### TODO test this in a smaller example - - fitted_model_both <- emuFit(Y = Y, - X = X, - formula = ~group, - data = covariates, - run_score_tests= TRUE, - return_wald_p = TRUE, - verbose = FALSE, - test_kj = data.frame(k = 2, j = 1:6), - control = list(tau = 1.2, - use_fullmodel_info = TRUE, - return_both_score_pvals = TRUE)) - - ps_full <- fitted_model_both$coef$score_pval_full_info - ps_null <- fitted_model_both$coef$score_pval_null_info - expect_true(is.numeric(ps_full)) - expect_true(is.numeric(ps_null)) - expect_true(cor(ps_null, ps_full) > 0.95) - expect_true(cor(ps_null, ps_full) < 1) - - # ## if return_both_score_pvals = TRUE, we - # ## get same p-values as if we separately run emuFit with use_fullmodel_info = TRUE and = FALSE" - # - # # fitted_model had use_fullmodel_info = FALSE - # - # fitted_full <- emuFit(Y = Y, - # X = X, - # formula = ~group, - # tau = 1.2, - # data = covariates, - # run_score_tests= TRUE, - # return_wald_p = TRUE, - # use_fullmodel_info = TRUE, - # verbose = FALSE, - # return_both_score_pvals = FALSE) - # - # expect_true(max(abs(fitted_model_both$coef$score_pval_full_info - fitted_full$coef$pval))==0) - # expect_true(max(abs(fitted_model_both$coef$score_pval_null_info - fitted_model$coef$pval))==0) - # - # ## Difference between using full and null model info is not extreme - # expect_true(cor(fitted_full$coef$pval, fitted_model$coef$pval) < 1) - # expect_true(cor(fitted_full$coef$pval, fitted_model$coef$pval) > 0.95) + verbose = FALSE, + test_kj = data.frame(k = 2, j = 1:J)) + + expect_true(all(fitted_model_use_fullmodel_info$coef$pval != fitted_model$coef$pval)) + expect_true(cor(fitted_model_use_fullmodel_info$coef$pval, fitted_model$coef$pval) > 0.95) }) -test_that("emuFit takes cluster argument without breaking ",{ - expect_silent({ - fitted_model_cluster <- emuFit(Y = Y, +test_that("cluster argument returns same estimates but different p-values",{ + expect_silent({ + fitted_model_cluster <- emuFit(Y = Y, X = X, formula = ~group, data = covariates, @@ -141,39 +88,25 @@ test_that("emuFit takes cluster argument without breaking ",{ compute_cis = TRUE, run_score_tests = TRUE, cluster = rep(1:3,each = 4), - test_kj = data.frame(k = 2, j = 1:6)) - }) + test_kj = data.frame(k = 2, j = 1:2)) + }) expect_silent({ fitted_model_nocluster <- emuFit(Y = Y, - X = X, - formula = ~group, - data = covariates, - verbose = FALSE, - B_null_tol = 1e-2, - tolerance = 0.01, - return_wald_p = FALSE, - compute_cis = TRUE, - run_score_tests = TRUE, - test_kj = data.frame(k = 2, j = 1:6)) + X = X, + formula = ~group, + data = covariates, + verbose = FALSE, + B_null_tol = 1e-2, + tolerance = 0.01, + return_wald_p = FALSE, + compute_cis = TRUE, + run_score_tests = TRUE, + test_kj = data.frame(k = 2, j = 1:2)) }) expect_true(all(fitted_model_nocluster$coef$estimate == fitted_model_cluster$coef$estimate)) - expect_true(all(fitted_model_nocluster$coef$se != fitted_model_cluster$coef$se)) - - expect_true(all(fitted_model_cluster$coef$se != fitted_model_nocluster$coef$se)) - - - - - ## emuFit takes formulas and actually fits a model (with score tests) - - expect_true(all(fitted_model_cluster$coef$wald_p>0 & fitted_model_cluster$coef$wald_p<1)) - expect_true(all(fitted_model_cluster$coef$pval>0 & fitted_model_cluster$coef$pval<1)) - expect_true(inherits(fitted_model_cluster$B,"matrix")) - expect_true(inherits(fitted_model_cluster$Y_augmented,"matrix")) - expect_true(cor(fitted_model_cluster$B[2,],b[2, ]) > 0.85) }) @@ -193,14 +126,14 @@ test_that("GEE with cluster covariance gives plausible type 1 error ",{ X <- cbind(1,rnorm(12)) covariates <- data.frame(group = X[,2]) - - b1 <- seq(1,5,length.out = 6) + J <- 6 + b1 <- seq(1,5,length.out = J) b1 <- b1 - mean(b1) b1[3:4] <- 0 - Y <- radEmu:::simulate_data(n = 12, J = 6, + Y <- radEmu:::simulate_data(n = 12, J = J, X = X, - b0 = rnorm(6), + b0 = rnorm(J), b1 = b1, distn = "ZINB", zinb_size = 2, @@ -333,8 +266,6 @@ test_that("GEE with cluster covariance gives plausible type 1 error ",{ # # # expect_true(all(fitted_model$coef$wald_p >= 0 & fitted_model$coef$wald_p <= 1)) -# expect_true(inherits(fitted_model$B,"matrix")) -# expect_true(inherits(fitted_model$Y_augmented,"matrix")) # expect_true(cor(fitted_model$B[2,],b1)>0.95) # # }) @@ -353,7 +284,7 @@ test_that("emuFit runs without penalty", { return_wald_p = FALSE, compute_cis = TRUE, run_score_tests = TRUE, - test_kj = data.frame(k = 2, j = 1:6)) + test_kj = data.frame(k = 2, j = 1:2)) }) }) @@ -369,23 +300,9 @@ test_that("emuFit runs with just intercept model", { return_wald_p = FALSE, compute_cis = TRUE, run_score_tests = TRUE, - test_kj = data.frame(k = 1, j = 1:6)) - }) - - expect_message({ - fitted_model1 <- emuFit(Y = Y, - X = X[, 1, drop = FALSE], - verbose = FALSE, - B_null_tol = 1e-2, - tolerance = 0.01, - return_wald_p = FALSE, - compute_cis = TRUE, - run_score_tests = TRUE, - test_kj = data.frame(k = 1, j = 1:6)) + test_kj = data.frame(k = 1, j = 1:2)) }) - expect_equal(fitted_model$coef[, 2:9], fitted_model1$coef[, 2:9]) - }) test_that("emuFit has 'score_test_hyperparams' object and throws warnings when convergence isn't hit", { @@ -631,7 +548,7 @@ test_that("emuFit refits starting at provided value if `B` or `fitted_model` are }) test_that("giving test_kj as valid strings works", { - colnames(Y) <- paste0("taxon", 1:6) + colnames(Y) <- paste0("taxon", 1:J) colnames(X) <- c("int", "group") res <- emuFit(Y = Y, X = X, compute_cis = FALSE, test_kj = data.frame(k = "group", j = "taxon3"), penalize = FALSE, tolerance = 0.1) @@ -685,9 +602,9 @@ test_that("multiple constraint functions can be submitted", { res4 <- emuFit(Y = Y, X = X, compute_cis = FALSE, test_kj = data.frame(k = 2, j = 1), penalize = TRUE, tolerance = 0.1, - constraint_fn = list(3, 5), + constraint_fn = list(3, 4), constraint_grad_fn = list(NULL, NULL), control = list(trackB = TRUE)) expect_true(all.equal(res4$B[1, 3], 0)) - expect_true(all.equal(res4$B[2, 5], 0)) + expect_true(all.equal(res4$B[2, 4], 0)) }) diff --git a/tests/testthat/test-fit_null_discrete.R b/tests/testthat/test-fit_null_discrete.R index 717bf57..de9ce5a 100644 --- a/tests/testthat/test-fit_null_discrete.R +++ b/tests/testthat/test-fit_null_discrete.R @@ -2,7 +2,7 @@ test_that("new discrete is correct", { # set.seed(1) - n <- 20 + n <- 16 J <- 15 p <- 5 beta <- matrix(rnorm(p*J, mean = 2, sd=1), ncol = J) From b366c444f3eb394ab4821a101a1bbfe07514d96a Mon Sep 17 00:00:00 2001 From: amy Date: Mon, 13 Apr 2026 16:39:48 -0700 Subject: [PATCH 3/5] reduce micro --- DESCRIPTION | 2 +- tests/testthat/test-emuFit_micro.R | 288 +++++++++++------------------ 2 files changed, 113 insertions(+), 177 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 787d3f2..edd40b0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: radEmu Title: Using Relative Abundance Data to Estimate of Multiplicative Differences in Mean Absolute Abundance -Version: 2.3.1.1 +Version: 2.3.2.0 Authors@R: c(person("David", "Clausen", role = c("aut")), person("Amy D", "Willis", email = "adwillis@uw.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2802-4317")), person("Sarah", "Teichman", role = "aut")) diff --git a/tests/testthat/test-emuFit_micro.R b/tests/testthat/test-emuFit_micro.R index 90c1163..075605c 100644 --- a/tests/testthat/test-emuFit_micro.R +++ b/tests/testthat/test-emuFit_micro.R @@ -1,50 +1,28 @@ -test_that("ML fit to simple example give reasonable output", { +test_that("With or without 'working_constraint' we get same results", { set.seed(4323) - X <- cbind(1,rep(c(0,1),each = 20)) - J <- 10 - n <- 40 - b1 <- 1:J - mean(1:J) + n <- 20 + X <- cbind(1,rep(c(0,1),each = n/2)) + J <- 6 + # b1 <- 1:J - mean(1:J) + b1 <- seq(1,J,length.out = J) + Y <- radEmu:::simulate_data(n = n, J = J, X = X, b0 = rnorm(J), b1 = b1, distn = "Poisson", - mean_z = 10) + mean_z = 5) ml_fit <- emuFit_micro(X = X, Y = Y, constraint_fn = rep(list(function(x) mean(x)), 2), maxit = 200, - tolerance = 1e-3, + tolerance = 1e-6, verbose = FALSE) - - # plot(b1-mean(b1),ml_fit[2,]) - # abline(a = 0,b = 1,lty =2,col = "red") - - expect_true(max(abs(ml_fit[2,] - (b1 - mean(b1)))) < 1) # increased to 1 -}) - -test_that("With or without 'working_constraint' we get same results", { - set.seed(4323) - X <- cbind(1,rep(c(0,1),each = 20)) - J <- 10 - n <- 40 - Y <- radEmu:::simulate_data(n = n, - J = J, - X = X, - b0 = rnorm(J), - b1 = seq(1,10,length.out = J), - distn = "Poisson", - mean_z = 5) - ml_fit <- emuFit_micro(X, - Y, - constraint_fn = rep(list(function(x) mean(x)), 2), - maxit = 200, - tolerance = 1e-6, - verbose= FALSE) + expect_true(max(abs(ml_fit[2,] - (b1 - mean(b1)))) < 1) # increased to 1 ml_fit_direct <- emuFit_micro(X, Y, @@ -53,61 +31,60 @@ test_that("With or without 'working_constraint' we get same results", { use_working_constraint = FALSE, tolerance = 1e-6, verbose = FALSE) - - + + expect_equal(ml_fit,ml_fit_direct,tolerance = 1e-4) }) -test_that("PL fit with categorical predictor matches analytical form of MPLE in this case, - and does NOT match MLE when group sizes are equal", { - set.seed(90333) - X <- cbind(1,rep(c(0,1),each = 20)) - z <- rnorm(40) - J <- 10 - p <- 2 - n <- 40 - b0 <- rnorm(J) - b1 <- seq(1,10,length.out = J)/5 - b <- rbind(b0,b1) - Y <- matrix(NA,ncol = J, nrow = 40) - - for(i in 1:40){ - for(j in 1:J){ - temp_mean <- exp(X[i,,drop = FALSE]%*%b[,j,drop = FALSE] + z[i]) - Y[i,j] <- rpois(1, lambda = temp_mean) - } - } - pl_fit <- emuFit_micro_penalized(X, - Y, - B = matrix(rnorm(20),nrow = 2), - constraint_fn = rep(list(function(x) mean(x)), 2), - maxit = 200, - tolerance = 1e-8, - verbose= FALSE) - - ml_fit <- emuFit_micro(X, +test_that("PL fit with categorical predictor matches analytical form of MPLE in this case, and does NOT match MLE when group sizes are equal", { + set.seed(90333) + X <- cbind(1,rep(c(0,1),each = 20)) + z <- rnorm(40) + J <- 10 + p <- 2 + n <- 40 + b0 <- rnorm(J) + b1 <- seq(1,10,length.out = J)/5 + b <- rbind(b0,b1) + Y <- matrix(NA,ncol = J, nrow = 40) + + for(i in 1:40){ + for(j in 1:J){ + temp_mean <- exp(X[i,,drop = FALSE]%*%b[,j,drop = FALSE] + z[i]) + Y[i,j] <- rpois(1, lambda = temp_mean) + } + } + pl_fit <- emuFit_micro_penalized(X, Y, B = matrix(rnorm(20),nrow = 2), constraint_fn = rep(list(function(x) mean(x)), 2), maxit = 200, tolerance = 1e-8, verbose= FALSE) - - cs_grp1 <- colSums(Y[1:20,]) - cs_grp2 <- colSums(Y[21:40,]) - - bhat1 <- log(cs_grp1 + 0.5) - log(cs_grp1[1] + 0.5) - bhat2 <- log(cs_grp2 + 0.5) - log(cs_grp2[1] +0.5) - bhat2 <- bhat2 - bhat1 - bhat1 <- bhat1 - mean(bhat1) - bhat2 <- bhat2 - mean(bhat2) - analytical_B <- rbind(bhat1,bhat2) - - expect_true(max(abs(pl_fit$B - analytical_B))< 1e-7) - expect_true(max(abs(ml_fit - analytical_B))>0.01) - }) + + ml_fit <- emuFit_micro(X, + Y, + B = matrix(rnorm(20),nrow = 2), + constraint_fn = rep(list(function(x) mean(x)), 2), + maxit = 200, + tolerance = 1e-8, + verbose= FALSE) + + cs_grp1 <- colSums(Y[1:20,]) + cs_grp2 <- colSums(Y[21:40,]) + + bhat1 <- log(cs_grp1 + 0.5) - log(cs_grp1[1] + 0.5) + bhat2 <- log(cs_grp2 + 0.5) - log(cs_grp2[1] +0.5) + bhat2 <- bhat2 - bhat1 + bhat1 <- bhat1 - mean(bhat1) + bhat2 <- bhat2 - mean(bhat2) + analytical_B <- rbind(bhat1,bhat2) + + expect_true(max(abs(pl_fit$B - analytical_B))< 1e-7) + expect_true(max(abs(ml_fit - analytical_B))>0.01) +}) test_that("PL fit with categorical predictor matches analytical form of MPLE in this case, @@ -171,7 +148,7 @@ test_that("We get same results with and without warm start", { b1 = 1:J, distn = "Poisson", mean_z = 2) - + # may need to have large number of iterations and small tolerance ml_fit <- emuFit_micro(X, Y, @@ -187,7 +164,7 @@ test_that("We get same results with and without warm start", { warm_start = FALSE, tolerance = 1e-14, verbose = FALSE) - + expect_equal(ml_fit, ml_fit_direct, tolerance = 1e-6) }) @@ -211,8 +188,8 @@ test_that("We get a fit if we don't specify constraint", { maxit = 200, tolerance = 1e-6, verbose = FALSE) - - + + expect_true(is.matrix(ml_fit)) }) @@ -220,7 +197,7 @@ test_that("ML fit to simple example give reasonable output with J >> n", { set.seed(4323) n <- 10 X <- cbind(1,rep(c(0,1),each = n/2)) - J <- 1000 + J <- 200 b0 <- rnorm(J) b1 <- seq(-5,5,length.out = J) b <- rbind(b0,b1) @@ -232,7 +209,7 @@ test_that("ML fit to simple example give reasonable output with J >> n", { b1 = b1, distn = "Poisson", mean_z = 10) - + ml_fit <- emuFit_micro(X, Y, constraint_fn = rep(list(function(x) mean(x)), 2), @@ -280,101 +257,60 @@ test_that("unpenalized fit uses B if given, and therefore fit is quicker", { }) -test_that("unpenalized fit converges quicker if optimize_rows is set to TRUE", { - set.seed(4323) - J <- 100 - n <- 100 - X <- cbind(1,rnorm(n)) - Y <- radEmu:::simulate_data(n = n, - J = J, - X = X, - b0 = rnorm(J), - b1 = seq(1,5,length.out = J), - distn = "Poisson", - # zinb_size = 2, - # zinb_zero_prop = 0.7, - mean_z = 10) - - start <- proc.time() - pl_fit_one <- emuFit_micro(X, - Y, - B = NULL, - # constraint_fn = function(x) mean(x), - maxit = 10000, - tolerance = 1e-6, - verbose= FALSE, - optimize_rows = FALSE) - end <- proc.time() - start - start_refit <- proc.time() - pl_fit_two <- emuFit_micro(X, - Y, - B = NULL, - # constraint_fn = function(x) mean(x), - maxit = 10000, - tolerance =1e-6, - verbose= FALSE, - optimize_rows = TRUE) - end_refit <- proc.time() - start_refit - # confirm that new approach is faster - expect_true(end_refit[3] < end[3]) - # confirm that two estimates are very similar - expect_true(max(pl_fit_one - pl_fit_two) < 1e-5) - -}) -test_that("unpenalized fit converges quicker if optimize_rows is set to TRUE with large p", { - - skip(message = "Skipping because test is very slow with J = 100 and p = 8") - - set.seed(4323) - J <- 100 - n <- 100 - X <- cbind(1,rnorm(n),rnorm(n), rep(c(0, 1, 0, 0), each = 25), - rep(c(0, 0, 1, 0), each = 25), rep(c(0, 0, 0, 1), each = 25), rnorm(n), - rep(0:1, 50)) - B <- rbind(rnorm(J), seq(1, 5, length.out = J), - rnorm(J), rnorm(J), rnorm(J), rnorm(J), - rnorm(J), rnorm(J)) - for (k in 1:ncol(X)) { - B[k, ] <- B[k, ] - radEmu:::pseudohuber_median(B[k, ], 0.1) - } - Y <- radEmu:::simulate_data(n = n, - J = J, - X = X, - B = B, - distn = "Poisson", - mean_z = 10) - - start <- proc.time() - pl_fit_one <- emuFit_micro(X, - Y, - B = NULL, - maxit = 10000, - tolerance = 1e-6, - verbose= FALSE, - optimize_rows = FALSE) - end <- proc.time() - start - start_refit <- proc.time() - pl_fit_two <- emuFit_micro(X, - Y, - B = NULL, - maxit = 10000, - tolerance =1e-6, - verbose= FALSE, - optimize_rows = TRUE) - end_refit <- proc.time() - start_refit - expect_true(end_refit[3] < end[3]) - max_est_error1 <- max(pl_fit_one - B) - max_est_error2 <- max(pl_fit_two - B) - max_est_diff <- max(pl_fit_one - pl_fit_two) - expect_true(max_est_error1 > max_est_error2) - mean_est_error1 <- mean(sqrt((pl_fit_one - B)^2)) - mean_est_error2 <- mean(sqrt((pl_fit_two - B)^2)) - mean_est_diff <- mean(sqrt((pl_fit_one - pl_fit_two)^2)) - expect_true(mean_est_error1 > mean_est_error2) - expect_true(mean_est_diff < 1e-3) - -}) +# test_that("unpenalized fit converges quicker if optimize_rows is set to TRUE with large p", { +# +# skip(message = "Skipping because test is very slow with J = 100 and p = 8") +# +# set.seed(4323) +# J <- 100 +# n <- 100 +# X <- cbind(1,rnorm(n),rnorm(n), rep(c(0, 1, 0, 0), each = 25), +# rep(c(0, 0, 1, 0), each = 25), rep(c(0, 0, 0, 1), each = 25), rnorm(n), +# rep(0:1, 50)) +# B <- rbind(rnorm(J), seq(1, 5, length.out = J), +# rnorm(J), rnorm(J), rnorm(J), rnorm(J), +# rnorm(J), rnorm(J)) +# for (k in 1:ncol(X)) { +# B[k, ] <- B[k, ] - radEmu:::pseudohuber_median(B[k, ], 0.1) +# } +# Y <- radEmu:::simulate_data(n = n, +# J = J, +# X = X, +# B = B, +# distn = "Poisson", +# mean_z = 10) +# +# start <- proc.time() +# pl_fit_one <- emuFit_micro(X, +# Y, +# B = NULL, +# maxit = 10000, +# tolerance = 1e-6, +# verbose= FALSE, +# optimize_rows = FALSE) +# end <- proc.time() - start +# start_refit <- proc.time() +# pl_fit_two <- emuFit_micro(X, +# Y, +# B = NULL, +# maxit = 10000, +# tolerance =1e-6, +# verbose= FALSE, +# optimize_rows = TRUE) +# end_refit <- proc.time() - start_refit +# expect_true(end_refit[3] < end[3]) +# max_est_error1 <- max(pl_fit_one - B) +# max_est_error2 <- max(pl_fit_two - B) +# max_est_diff <- max(pl_fit_one - pl_fit_two) +# expect_true(max_est_error1 > max_est_error2) +# mean_est_error1 <- mean(sqrt((pl_fit_one - B)^2)) +# mean_est_error2 <- mean(sqrt((pl_fit_two - B)^2)) +# mean_est_diff <- mean(sqrt((pl_fit_one - pl_fit_two)^2)) +# expect_true(mean_est_error1 > mean_est_error2) +# expect_true(mean_est_diff < 1e-3) +# +# }) # # # From 824aea44a35b94479b13b1bf719be5a9ad28ea44 Mon Sep 17 00:00:00 2001 From: amy Date: Mon, 13 Apr 2026 16:47:27 -0700 Subject: [PATCH 4/5] test print --- tests/testthat/test-emuFit.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-emuFit.R b/tests/testthat/test-emuFit.R index edfa5d4..f77643e 100644 --- a/tests/testthat/test-emuFit.R +++ b/tests/testthat/test-emuFit.R @@ -39,6 +39,7 @@ test_that("emuFit takes formulas and actually fits a model", { test_kj = data.frame(k = 2, j = 1:J)) }) + expect_output(print(fitted_model)) expect_true(all(fitted_model$coef$pval != fitted_model$coef$wald_p)) expect_true(all(fitted_model$coef$wald_p>0 & fitted_model$coef$wald_p<1)) expect_true(all(fitted_model$coef$pval>0 & fitted_model$coef$pval<1)) From a75e1b33dd6ed7f855d5df1d042f4994a47ecc6d Mon Sep 17 00:00:00 2001 From: amy Date: Tue, 14 Apr 2026 14:36:13 -0700 Subject: [PATCH 5/5] index version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index edd40b0..a73bfb4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: radEmu Title: Using Relative Abundance Data to Estimate of Multiplicative Differences in Mean Absolute Abundance -Version: 2.3.2.0 +Version: 2.3.2.1 Authors@R: c(person("David", "Clausen", role = c("aut")), person("Amy D", "Willis", email = "adwillis@uw.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2802-4317")), person("Sarah", "Teichman", role = "aut"))