From 9bd5ee8b58e3229256f4bc7c20420328ab11718f Mon Sep 17 00:00:00 2001
From: amy <amy.willis7@gmail.com>
Date: Mon, 13 Apr 2026 15:53:28 -0700
Subject: [PATCH 1/5] reduce some tests; remove some files

---
 R/asymmetric_hat.R                      | 63 -------------------------
 R/eta_jacobian.R                        | 37 ---------------
 R/expected_info_repar.R                 | 52 --------------------
 R/update_data.R                         | 49 -------------------
 tests/testthat/test-fit_null_discrete.R |  8 ++--
 5 files changed, 4 insertions(+), 205 deletions(-)
 delete mode 100644 R/asymmetric_hat.R
 delete mode 100644 R/eta_jacobian.R
 delete mode 100644 R/expected_info_repar.R
 delete mode 100644 R/update_data.R

diff --git a/R/asymmetric_hat.R b/R/asymmetric_hat.R
deleted file mode 100644
index 8c908ff..0000000
--- a/R/asymmetric_hat.R
+++ /dev/null
@@ -1,63 +0,0 @@
-
-#this function creates the "asymmetric hat" matrix used for 
-#creating data augmentations as described in Kosmidis & Firth (2011)
-
-asymmetric_hat <- function(beta_tilde_J, #beta expressed as a vector
-                           tau, #column sums of Y (without data augmentations)
-                           X_tilde_J, #expanded design matrix (for Y as a single vector)
-                           J, #number of taxa
-                           n, #number of samples
-                           p, #number of predictors, incl. intercept
-                           collect_jacobians = TRUE,
-                           verbose= FALSE){
-
-  #get the expected information matrix 
-  expected_info_list <- expected_info_repar(beta_tilde_J,
-                                       tau,
-                                       X_tilde_J,
-                                       J,
-                                       n,
-                                       p,
-                                       verbose = verbose)
-  expected_info <- expected_info_list$expected_info
-  # expected_info_eigen <- eigen(expected_info)
-
-  #eigendecompose expected information after removing J-th taxon's parameters 
-  #(implicitly imposing B^J = 0)
-  expected_info_eigen <- eigen(expected_info[1:(p*(J - 1)),1:(p*(J - 1))])
-
-  # get info^(-0.5) using eigendecomposition
-  expected_info_half_inv <-  expected_info_eigen$vectors %*%
-    diag(sapply( expected_info_eigen$values,function(x) ifelse(x>0,1/sqrt(x),0))) %*%
-    t(expected_info_eigen$vectors)
-
-  # now we add on information in tau
-  tau_start <- p*(J - 1) + 1 #index where tau starts 
-  tau_end <- nrow(expected_info) #index where tau ends
-  #expected info is then block diagonal with one block for beta and one for tau:
-  expected_info_half_inv <- Matrix::bdiag(expected_info_half_inv,
-                                          Matrix::Diagonal(
-                                            x = 1/sqrt(diag(expected_info)[tau_start:tau_end])))
-
-  #compute augmentations
-  augmentations <- matrix(0,nrow = n, ncol = J)
-
-  for(i in 1:n){
-    if(verbose){
-      message("Computing data augmentation for observation ",i,".")
-    }
-    which_indices <- 1:J + (i - 1)*J #hmm maybe this isn't used...
-    jacobian_i <- expected_info_list$jacobians[[i]] #jacobian (d[mean]/d(beta,tau), I believe)
-    mu_i <- expected_info_list$mus[[i]] # means for i-th observation
-    postmult_jacobian_i <- jacobian_i%*%  expected_info_half_inv #working toward asymmetric hat
-    
-    #entries of asymmetric hat matrix for observation i:
-    H_i <- Matrix::tcrossprod(postmult_jacobian_i)%*%diag(as.numeric(mu_i))
-    #diagonals are data augmentations:
-    augmentations[i,] <- diag(as.matrix(H_i))[1:J]/2
-  }
-
-  return(augmentations)
-
-
-}
diff --git a/R/eta_jacobian.R b/R/eta_jacobian.R
deleted file mode 100644
index 4363f29..0000000
--- a/R/eta_jacobian.R
+++ /dev/null
@@ -1,37 +0,0 @@
-
-#function to get jacobian of mean function for observation i wrt beta, tau
-#used in getting data augmentations
-eta_jacobian <- function(beta_tilde_J, #long format beta (w.o. J-th taxon, I believe)
-                         tau, #colsums of Y (no augmentations)
-                         X_tilde_J, #expanded design matrix (for Y_vec)
-                         J, #number taxa
-                         i, #observation index
-                         n #number of observations
-                         ){
-
-  which_indices <- 1:J + (i - 1)*J
-  p_i <- exp(X_tilde_J[which_indices,]%*%beta_tilde_J)
-  p_i <- p_i/sum(p_i)
-  premult_mat <- Matrix::Matrix(1,nrow = J, ncol = 1)%*%Matrix::t(p_i)
-  premult_mat <- Matrix::Diagonal(x = rep(1,J)) - premult_mat
-  d_eta_i_d_beta_tilde <- premult_mat%*%X_tilde_J[which_indices,]
-  # d_eta_i_d_beta_tilde <- as(d_eta_i_d_beta_tilde,"sparseMatrix")
-  # e_i <- Matrix(0,nrow = 1, ncol = n)
-  # e_i[1,i] <- 1
-  # d_eta_i_d_tau <- Matrix::Matrix(0,nrow = J, ncol = n)
-  # d_eta_i_d_tau[,i] <- 1/tau[i]
-
-  d_eta_i_d_tau <- Matrix::sparseMatrix(i = 1:J,
-                                        j = rep(i, J),
-                                        x = rep(1/tau[i],J),
-                                        dims = c(J, n))
-
-  jacobian <- cbind(d_eta_i_d_beta_tilde, d_eta_i_d_tau)
-  # jacobian <- list()
-  # jacobian <- list("dbeta" = d_eta_i_d_beta_tilde,
-  #                  "dtau" = d_eta_i_d_tau)
-
-
-
-  return(jacobian)
-}
diff --git a/R/expected_info_repar.R b/R/expected_info_repar.R
deleted file mode 100644
index 90392da..0000000
--- a/R/expected_info_repar.R
+++ /dev/null
@@ -1,52 +0,0 @@
-
-#another function to get expected information (and possibly jacobians of 
-#mean function in parameters)
-expected_info_repar <- function(beta_tilde_J,
-                                tau,
-                                X_tilde_J,
-                                J,
-                                n,
-                                p,
-                                collect_byproducts = TRUE,
-                                verbose = FALSE){
-
-  expected_info <- Matrix::Matrix(0,nrow = n + p*(J - 1), ncol = n + p*(J - 1))
-
-  if(collect_byproducts){
-    jacobians <- vector(n, mode = "list")
-    mus <- vector(n,mode = "list")
-  } else{
-    jacobians <- NULL
-    mus <- NULL
-  }
-  # info_bits <- vector(n,mode = "list")
-
-  # pv5 <- profvis({
-  for(i in 1:n){
-    if(verbose){
-      message("Computing information contribution of observation ", i,".")
-    }
-
-    which_indices <- 1:J + (i - 1)*J
-    d_eta_i_d_theta_J <- eta_jacobian(beta_tilde_J,
-                                      tau,
-                                      X_tilde_J,
-                                      J,
-                                      i,
-                                      n)
-    if(collect_byproducts){
-      jacobians[[i]] <- d_eta_i_d_theta_J
-    }
-    mu_i <- exp(X_tilde_J[which_indices,]%*%beta_tilde_J + log(tau[i]) -
-                  log(sum(exp(X_tilde_J[which_indices,]%*%beta_tilde_J ))))
-    mus[[i]] <- mu_i
-    expected_info <- expected_info +
-      Matrix::crossprod(d_eta_i_d_theta_J,Matrix::crossprod(Matrix::Diagonal(x = as.numeric(mu_i)),d_eta_i_d_theta_J))
-    # Matrix::t(d_eta_i_d_theta_J)%*%diag(as.numeric(mu_i)) %*%
-    # d_eta_i_d_theta_J
-  }
-  # })
-  return(list("expected_info" = expected_info,
-              "jacobians" = jacobians,
-              "mus" = mus))
-}
diff --git a/R/update_data.R b/R/update_data.R
deleted file mode 100644
index 5e9c1d6..0000000
--- a/R/update_data.R
+++ /dev/null
@@ -1,49 +0,0 @@
-
-
-update_data <- function(Y,
-                        X_tilde,
-                        B,
-                        p,
-                        n,
-                        J,
-                        verbose = FALSE){
-  X_tilde_J <- X_tilde
-  indexes_to_zero <- numeric(0)
-  for(i in 1:n){
-    indexes_to_zero <- c(indexes_to_zero,(i - 1)*J + J)
-  }
-  X_tilde_J[indexes_to_zero,] <- 0
-  # for(i in 1:n){
-  #   X_tilde_J[(i - 1)*J + J,] <- 0#*X_tilde_J[(i - 1)*J + J,]
-  # }
-  X_tilde_J <- X_tilde_J[,1:(p*(J - 1))]
-  B_J <- B
-  for(k in 1:p){
-    B_J[k,] <- B_J[k,] - B_J[k,J]
-  }
-  beta_tilde_J <- B_cup_from_B(B_J[,1:(J - 1),drop= FALSE])
-
-  tau <- rowSums(Y)
-  if(sum(tau ==0)>0){
-  tau[tau == 0] <- 0.1
-  warning("At least one observation has total count equal to zero!")
-  }
-    # sapply(1:n,
-    #        function(i)
-    #          log(sum(Y[i,])) -
-    #          log(sum(exp(X_tilde_J[(i - 1)*J + 1:J,]%*%beta_tilde_J))))
-
-
-
-  augmentations <-
-    asymmetric_hat(beta_tilde_J = beta_tilde_J,
-                   tau = tau,
-                   X_tilde_J = X_tilde_J,
-                   J = J,
-                   n = n,
-                   p = p,
-                   verbose = verbose)
-
-  return(Y + augmentations)
-}
-
diff --git a/tests/testthat/test-fit_null_discrete.R b/tests/testthat/test-fit_null_discrete.R
index 61b32d9..717bf57 100644
--- a/tests/testthat/test-fit_null_discrete.R
+++ b/tests/testthat/test-fit_null_discrete.R
@@ -2,8 +2,8 @@
 test_that("new discrete is correct", {
   
   # set.seed(1)
-  n <- 40
-  J <- 30
+  n <- 20
+  J <- 15
   p <- 5
   beta <- matrix(rnorm(p*J, mean = 2, sd=1), ncol = J)
   X <- cbind(1, 
@@ -116,7 +116,7 @@ test_that("new discrete is correct with flexible jref and jstar", {
 
 test_that("new discrete is fast", {
   
-  #skip("Too slow for automated testing; 20 mins on laptop")
+  skip("Too slow; just checks fit_null_discrete faster than fit_null.")
   
   set.seed(2)
   n <- 60
@@ -174,7 +174,7 @@ test_that("new discrete is fast", {
 
 test_that("new discrete null aligns with older code", {
   
-  skip("Too slow for automated testing; ~3 mins on laptop")
+  skip("Too slow. ~3 mins on laptop. Just checks alignment between versions.")
   
   set.seed(4)
   n <- 12

From 89939228c147dd6b49ba874b7fc3d03b16729d8c Mon Sep 17 00:00:00 2001
From: amy <amy.willis7@gmail.com>
Date: Mon, 13 Apr 2026 16:25:56 -0700
Subject: [PATCH 2/5] remove some redundancies

---
 DESCRIPTION                             |   2 +-
 tests/testthat/test-emuFit.R            | 153 ++++++------------------
 tests/testthat/test-fit_null_discrete.R |   2 +-
 3 files changed, 37 insertions(+), 120 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 71fe3e2..787d3f2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: radEmu
 Title: Using Relative Abundance Data to Estimate of Multiplicative Differences in Mean Absolute Abundance 
-Version: 2.3.1.0
+Version: 2.3.1.1
 Authors@R: c(person("David", "Clausen", role = c("aut")),
              person("Amy D", "Willis", email = "adwillis@uw.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2802-4317")),
              person("Sarah", "Teichman", role = "aut"))
diff --git a/tests/testthat/test-emuFit.R b/tests/testthat/test-emuFit.R
index ffb2426..edfa5d4 100644
--- a/tests/testthat/test-emuFit.R
+++ b/tests/testthat/test-emuFit.R
@@ -1,5 +1,5 @@
 set.seed(11)
-J <- 6
+J <- 4
 n <- 12
 X <- cbind(1,rnorm(n))
 b0 <- rnorm(J)
@@ -33,16 +33,13 @@ test_that("emuFit takes formulas and actually fits a model", {
                            verbose = FALSE,
                            B_null_tol = 1e-2,
                            tolerance = 0.01,
-                           return_wald_p = FALSE,
+                           return_wald_p = TRUE,
                            compute_cis = TRUE,
                            run_score_tests = TRUE, 
-                           test_kj = data.frame(k = 2, j = 1:6))
+                           test_kj = data.frame(k = 2, j = 1:J))
   })
   
-  
-  
-  ## emuFit takes formulas and actually fits a model (with score tests)
-  
+  expect_true(all(fitted_model$coef$pval != fitted_model$coef$wald_p))
   expect_true(all(fitted_model$coef$wald_p>0 & fitted_model$coef$wald_p<1))
   expect_true(all(fitted_model$coef$pval>0 & fitted_model$coef$pval<1))
   expect_true(inherits(fitted_model$B,"matrix"))
@@ -60,8 +57,7 @@ test_that("emuFit takes formulas and actually fits a model", {
   
   expect_identical(fitted_model$coef$estimate, second_model$coef$estimate)
   
-  ##  emuFit takes formulas and actually fits a model (with score tests using full model info)
-  
+  ##  changing whether we use the full model changes the p-values
   fitted_model_use_fullmodel_info <- emuFit(Y = Y,
                                             X = X,
                                             formula = ~group,
@@ -69,68 +65,19 @@ test_that("emuFit takes formulas and actually fits a model", {
                                             tolerance = 0.01,
                                             data = covariates,
                                             run_score_tests= TRUE,
-                                            return_wald_p = TRUE, ### diff
+                                            return_wald_p = TRUE, 
                                             control = list(use_fullmodel_info = TRUE), ### diff
-                                            verbose = FALSE,
-                                            test_kj = data.frame(k = 2, j = 1:6))
-  
-  
-  expect_true(all(fitted_model_use_fullmodel_info$coef$wald_p>0 & fitted_model_use_fullmodel_info$coef$wald_p<1))
-  expect_true(inherits(fitted_model_use_fullmodel_info$B,"matrix"))
-  expect_true(inherits(fitted_model_use_fullmodel_info$Y_augmented,"matrix"))
-  expect_true(all(fitted_model_use_fullmodel_info$coef$pval>0 & fitted_model_use_fullmodel_info$coef$pval<1))
-  
-  ## if return_both_score_pvals = TRUE, emuFit runs and returns two non-identical p-values
-  
-  ### TODO test this in a smaller example
-  
-  fitted_model_both <-  emuFit(Y = Y,
-                               X = X,
-                               formula = ~group,
-                               data = covariates,
-                               run_score_tests= TRUE,
-                               return_wald_p = TRUE,
-                               verbose = FALSE,
-                               test_kj = data.frame(k = 2, j = 1:6),
-                               control = list(tau = 1.2, 
-                                              use_fullmodel_info = TRUE,
-                                              return_both_score_pvals = TRUE))
-  
-  ps_full <- fitted_model_both$coef$score_pval_full_info
-  ps_null <- fitted_model_both$coef$score_pval_null_info
-  expect_true(is.numeric(ps_full))
-  expect_true(is.numeric(ps_null))
-  expect_true(cor(ps_null, ps_full) > 0.95)
-  expect_true(cor(ps_null, ps_full) < 1)
-   
-  # ## if return_both_score_pvals = TRUE, we
-  # ## get same p-values as if we separately run emuFit with use_fullmodel_info = TRUE and = FALSE"
-  # 
-  # # fitted_model had use_fullmodel_info = FALSE
-  # 
-  # fitted_full <- emuFit(Y = Y,
-  #                       X = X,
-  #                       formula = ~group,
-  #                       tau = 1.2,
-  #                       data = covariates,
-  #                       run_score_tests= TRUE,
-  #                       return_wald_p = TRUE,
-  #                       use_fullmodel_info = TRUE,
-  #                       verbose = FALSE,
-  #                       return_both_score_pvals = FALSE)
-  # 
-  # expect_true(max(abs(fitted_model_both$coef$score_pval_full_info - fitted_full$coef$pval))==0)
-  # expect_true(max(abs(fitted_model_both$coef$score_pval_null_info - fitted_model$coef$pval))==0)
-  # 
-  # ## Difference between using full and null model info is not extreme
-  # expect_true(cor(fitted_full$coef$pval, fitted_model$coef$pval) < 1)
-  # expect_true(cor(fitted_full$coef$pval, fitted_model$coef$pval) > 0.95)
+                                            verbose = FALSE, 
+                                            test_kj = data.frame(k = 2, j = 1:J))
+  
+  expect_true(all(fitted_model_use_fullmodel_info$coef$pval != fitted_model$coef$pval))
+  expect_true(cor(fitted_model_use_fullmodel_info$coef$pval, fitted_model$coef$pval) > 0.95)
   
 })
 
-test_that("emuFit takes cluster argument without breaking ",{
-          expect_silent({
-            fitted_model_cluster <- emuFit(Y = Y,
+test_that("cluster argument returns same estimates but different p-values",{
+  expect_silent({
+    fitted_model_cluster <- emuFit(Y = Y,
                                    X = X,
                                    formula = ~group,
                                    data = covariates,
@@ -141,39 +88,25 @@ test_that("emuFit takes cluster argument without breaking ",{
                                    compute_cis = TRUE,
                                    run_score_tests = TRUE, 
                                    cluster = rep(1:3,each = 4),
-                                   test_kj = data.frame(k = 2, j = 1:6))
-          })
+                                   test_kj = data.frame(k = 2, j = 1:2))
+  })
   
   expect_silent({
     fitted_model_nocluster <- emuFit(Y = Y,
-                                   X = X,
-                                   formula = ~group,
-                                   data = covariates,
-                                   verbose = FALSE,
-                                   B_null_tol = 1e-2,
-                                   tolerance = 0.01,
-                                   return_wald_p = FALSE,
-                                   compute_cis = TRUE,
-                                   run_score_tests = TRUE, 
-                                   test_kj = data.frame(k = 2, j = 1:6))
+                                     X = X,
+                                     formula = ~group,
+                                     data = covariates,
+                                     verbose = FALSE,
+                                     B_null_tol = 1e-2,
+                                     tolerance = 0.01,
+                                     return_wald_p = FALSE,
+                                     compute_cis = TRUE,
+                                     run_score_tests = TRUE, 
+                                     test_kj = data.frame(k = 2, j = 1:2))
   })
   
   expect_true(all(fitted_model_nocluster$coef$estimate == fitted_model_cluster$coef$estimate))
-  
   expect_true(all(fitted_model_nocluster$coef$se != fitted_model_cluster$coef$se))
-  
-  expect_true(all(fitted_model_cluster$coef$se != fitted_model_nocluster$coef$se))
-  
-          
-          
-          
-          ## emuFit takes formulas and actually fits a model (with score tests)
-          
-          expect_true(all(fitted_model_cluster$coef$wald_p>0 & fitted_model_cluster$coef$wald_p<1))
-          expect_true(all(fitted_model_cluster$coef$pval>0 & fitted_model_cluster$coef$pval<1))
-          expect_true(inherits(fitted_model_cluster$B,"matrix"))
-          expect_true(inherits(fitted_model_cluster$Y_augmented,"matrix"))
-          expect_true(cor(fitted_model_cluster$B[2,],b[2, ]) > 0.85)
           
 })
 
@@ -193,14 +126,14 @@ test_that("GEE with cluster covariance gives plausible type 1 error ",{
     
     X <- cbind(1,rnorm(12))
     covariates <- data.frame(group = X[,2])
-    
-    b1 <- seq(1,5,length.out = 6)
+    J <- 6
+    b1 <- seq(1,5,length.out = J)
     b1 <- b1 - mean(b1)
     b1[3:4] <- 0
     
-    Y <- radEmu:::simulate_data(n = 12, J = 6,
+    Y <- radEmu:::simulate_data(n = 12, J = J,
                                 X = X,
-                                b0 = rnorm(6),
+                                b0 = rnorm(J),
                                 b1 = b1,
                                 distn = "ZINB",
                                 zinb_size = 2,
@@ -333,8 +266,6 @@ test_that("GEE with cluster covariance gives plausible type 1 error ",{
 #   
 #   
 #   expect_true(all(fitted_model$coef$wald_p >= 0 & fitted_model$coef$wald_p <= 1))
-#   expect_true(inherits(fitted_model$B,"matrix"))
-#   expect_true(inherits(fitted_model$Y_augmented,"matrix"))
 #   expect_true(cor(fitted_model$B[2,],b1)>0.95)
 #   
 # })
@@ -353,7 +284,7 @@ test_that("emuFit runs without penalty", {
                            return_wald_p = FALSE,
                            compute_cis = TRUE,
                            run_score_tests = TRUE, 
-                           test_kj = data.frame(k = 2, j = 1:6))
+                           test_kj = data.frame(k = 2, j = 1:2))
   })
 })
 
@@ -369,23 +300,9 @@ test_that("emuFit runs with just intercept model", {
                            return_wald_p = FALSE,
                            compute_cis = TRUE,
                            run_score_tests = TRUE, 
-                           test_kj = data.frame(k = 1, j = 1:6))
-  })
-  
-  expect_message({
-    fitted_model1 <- emuFit(Y = Y,
-                           X = X[, 1, drop = FALSE],
-                           verbose = FALSE,
-                           B_null_tol = 1e-2,
-                           tolerance = 0.01,
-                           return_wald_p = FALSE,
-                           compute_cis = TRUE,
-                           run_score_tests = TRUE, 
-                           test_kj = data.frame(k = 1, j = 1:6))
+                           test_kj = data.frame(k = 1, j = 1:2))
   })
   
-  expect_equal(fitted_model$coef[, 2:9], fitted_model1$coef[, 2:9])
-  
 })
 
 test_that("emuFit has 'score_test_hyperparams' object and throws warnings when convergence isn't hit", {
@@ -631,7 +548,7 @@ test_that("emuFit refits starting at provided value if `B` or `fitted_model` are
 })
 
 test_that("giving test_kj as valid strings works", {
-  colnames(Y) <- paste0("taxon", 1:6)
+  colnames(Y) <- paste0("taxon", 1:J)
   colnames(X) <- c("int", "group")
   res <- emuFit(Y = Y, X = X, compute_cis = FALSE, test_kj = data.frame(k = "group", j = "taxon3"),
                 penalize = FALSE, tolerance = 0.1)
@@ -685,9 +602,9 @@ test_that("multiple constraint functions can be submitted", {
   
   res4 <- emuFit(Y = Y, X = X, compute_cis = FALSE, test_kj = data.frame(k = 2, j = 1),
                  penalize = TRUE, tolerance = 0.1, 
-                 constraint_fn = list(3, 5), 
+                 constraint_fn = list(3, 4), 
                  constraint_grad_fn = list(NULL, NULL),
                  control = list(trackB = TRUE))
   expect_true(all.equal(res4$B[1, 3], 0))
-  expect_true(all.equal(res4$B[2, 5], 0))
+  expect_true(all.equal(res4$B[2, 4], 0))
 })
diff --git a/tests/testthat/test-fit_null_discrete.R b/tests/testthat/test-fit_null_discrete.R
index 717bf57..de9ce5a 100644
--- a/tests/testthat/test-fit_null_discrete.R
+++ b/tests/testthat/test-fit_null_discrete.R
@@ -2,7 +2,7 @@
 test_that("new discrete is correct", {
   
   # set.seed(1)
-  n <- 20
+  n <- 16
   J <- 15
   p <- 5
   beta <- matrix(rnorm(p*J, mean = 2, sd=1), ncol = J)

From b366c444f3eb394ab4821a101a1bbfe07514d96a Mon Sep 17 00:00:00 2001
From: amy <amy.willis7@gmail.com>
Date: Mon, 13 Apr 2026 16:39:48 -0700
Subject: [PATCH 3/5] reduce micro

---
 DESCRIPTION                        |   2 +-
 tests/testthat/test-emuFit_micro.R | 288 +++++++++++------------------
 2 files changed, 113 insertions(+), 177 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 787d3f2..edd40b0 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: radEmu
 Title: Using Relative Abundance Data to Estimate of Multiplicative Differences in Mean Absolute Abundance 
-Version: 2.3.1.1
+Version: 2.3.2.0
 Authors@R: c(person("David", "Clausen", role = c("aut")),
              person("Amy D", "Willis", email = "adwillis@uw.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2802-4317")),
              person("Sarah", "Teichman", role = "aut"))
diff --git a/tests/testthat/test-emuFit_micro.R b/tests/testthat/test-emuFit_micro.R
index 90c1163..075605c 100644
--- a/tests/testthat/test-emuFit_micro.R
+++ b/tests/testthat/test-emuFit_micro.R
@@ -1,50 +1,28 @@
 
-test_that("ML fit to simple example give reasonable output", {
+test_that("With or without 'working_constraint' we get same results", {
   set.seed(4323)
-  X <- cbind(1,rep(c(0,1),each = 20))
-  J <- 10
-  n <- 40
-  b1 <- 1:J - mean(1:J)
+  n <- 20
+  X <- cbind(1,rep(c(0,1),each = n/2))
+  J <- 6
+  # b1 <- 1:J - mean(1:J)
+  b1 <- seq(1,J,length.out = J)
+  
   Y <- radEmu:::simulate_data(n = n,
                               J = J,
                               X = X,
                               b0 = rnorm(J),
                               b1 = b1,
                               distn = "Poisson",
-                              mean_z = 10)
+                              mean_z = 5)
   
   ml_fit <- emuFit_micro(X = X,
                          Y = Y,
                          constraint_fn = rep(list(function(x) mean(x)), 2), 
                          maxit = 200,
-                         tolerance = 1e-3,
+                         tolerance = 1e-6,
                          verbose = FALSE)
-
-  # plot(b1-mean(b1),ml_fit[2,])
-  # abline(a = 0,b = 1,lty =2,col = "red")
-
-  expect_true(max(abs(ml_fit[2,] - (b1 - mean(b1)))) < 1) # increased to 1
-})
-
-test_that("With or without 'working_constraint' we get same results", {
-  set.seed(4323)
-  X <- cbind(1,rep(c(0,1),each = 20))
-  J <- 10
-  n <- 40
-  Y <- radEmu:::simulate_data(n = n,
-                              J = J,
-                              X = X,
-                              b0 = rnorm(J),
-                              b1 = seq(1,10,length.out = J),
-                              distn = "Poisson",
-                              mean_z = 5)
   
-  ml_fit <- emuFit_micro(X,
-                         Y,
-                         constraint_fn = rep(list(function(x) mean(x)), 2),
-                         maxit = 200,
-                         tolerance = 1e-6,
-                         verbose= FALSE)
+  expect_true(max(abs(ml_fit[2,] - (b1 - mean(b1)))) < 1) # increased to 1
   
   ml_fit_direct <- emuFit_micro(X,
                                 Y,
@@ -53,61 +31,60 @@ test_that("With or without 'working_constraint' we get same results", {
                                 use_working_constraint = FALSE,
                                 tolerance = 1e-6,
                                 verbose = FALSE)
-
-
+  
+  
   expect_equal(ml_fit,ml_fit_direct,tolerance = 1e-4)
 })
 
 
 
-test_that("PL fit with categorical predictor matches analytical form of MPLE in this case,
-          and does NOT match MLE when group sizes are equal", {
-            set.seed(90333)
-            X <- cbind(1,rep(c(0,1),each = 20))
-            z <- rnorm(40) 
-            J <- 10
-            p <- 2
-            n <- 40
-            b0 <- rnorm(J)
-            b1 <- seq(1,10,length.out = J)/5
-            b <- rbind(b0,b1)
-            Y <- matrix(NA,ncol = J, nrow = 40)
-            
-            for(i in 1:40){
-              for(j in 1:J){
-                temp_mean <- exp(X[i,,drop = FALSE]%*%b[,j,drop = FALSE] + z[i])
-                Y[i,j] <- rpois(1, lambda = temp_mean)
-              }
-            }
-            pl_fit <- emuFit_micro_penalized(X,
-                                             Y,
-                                             B = matrix(rnorm(20),nrow = 2),
-                                             constraint_fn = rep(list(function(x) mean(x)), 2),
-                                             maxit = 200,
-                                             tolerance = 1e-8,
-                                             verbose= FALSE)
-            
-            ml_fit <- emuFit_micro(X,
+test_that("PL fit with categorical predictor matches analytical form of MPLE in this case, and does NOT match MLE when group sizes are equal", {
+  set.seed(90333)
+  X <- cbind(1,rep(c(0,1),each = 20))
+  z <- rnorm(40) 
+  J <- 10
+  p <- 2
+  n <- 40
+  b0 <- rnorm(J)
+  b1 <- seq(1,10,length.out = J)/5
+  b <- rbind(b0,b1)
+  Y <- matrix(NA,ncol = J, nrow = 40)
+  
+  for(i in 1:40){
+    for(j in 1:J){
+      temp_mean <- exp(X[i,,drop = FALSE]%*%b[,j,drop = FALSE] + z[i])
+      Y[i,j] <- rpois(1, lambda = temp_mean)
+    }
+  }
+  pl_fit <- emuFit_micro_penalized(X,
                                    Y,
                                    B = matrix(rnorm(20),nrow = 2),
                                    constraint_fn = rep(list(function(x) mean(x)), 2),
                                    maxit = 200,
                                    tolerance = 1e-8,
                                    verbose= FALSE)
-            
-            cs_grp1 <- colSums(Y[1:20,])
-            cs_grp2 <- colSums(Y[21:40,])
-            
-            bhat1 <- log(cs_grp1 + 0.5) - log(cs_grp1[1] + 0.5)
-            bhat2 <- log(cs_grp2 + 0.5) - log(cs_grp2[1] +0.5)
-            bhat2 <- bhat2 - bhat1
-            bhat1 <- bhat1 - mean(bhat1)
-            bhat2 <- bhat2 - mean(bhat2)
-            analytical_B <- rbind(bhat1,bhat2)
-            
-            expect_true(max(abs(pl_fit$B - analytical_B))< 1e-7)
-            expect_true(max(abs(ml_fit - analytical_B))>0.01)
-          })
+  
+  ml_fit <- emuFit_micro(X,
+                         Y,
+                         B = matrix(rnorm(20),nrow = 2),
+                         constraint_fn = rep(list(function(x) mean(x)), 2),
+                         maxit = 200,
+                         tolerance = 1e-8,
+                         verbose= FALSE)
+  
+  cs_grp1 <- colSums(Y[1:20,])
+  cs_grp2 <- colSums(Y[21:40,])
+  
+  bhat1 <- log(cs_grp1 + 0.5) - log(cs_grp1[1] + 0.5)
+  bhat2 <- log(cs_grp2 + 0.5) - log(cs_grp2[1] +0.5)
+  bhat2 <- bhat2 - bhat1
+  bhat1 <- bhat1 - mean(bhat1)
+  bhat2 <- bhat2 - mean(bhat2)
+  analytical_B <- rbind(bhat1,bhat2)
+  
+  expect_true(max(abs(pl_fit$B - analytical_B))< 1e-7)
+  expect_true(max(abs(ml_fit - analytical_B))>0.01)
+})
 
 
 test_that("PL fit with categorical predictor matches analytical form of MPLE in this case,
@@ -171,7 +148,7 @@ test_that("We get same results with and without warm start", {
                               b1 = 1:J,
                               distn = "Poisson",
                               mean_z = 2)
-
+  
   # may need to have large number of iterations and small tolerance
   ml_fit <- emuFit_micro(X,
                          Y,
@@ -187,7 +164,7 @@ test_that("We get same results with and without warm start", {
                                 warm_start = FALSE,
                                 tolerance = 1e-14,
                                 verbose = FALSE)
-
+  
   expect_equal(ml_fit, ml_fit_direct, tolerance = 1e-6)
 })
 
@@ -211,8 +188,8 @@ test_that("We get a fit if we don't specify constraint", {
                          maxit = 200,
                          tolerance = 1e-6,
                          verbose = FALSE)
-
-
+  
+  
   expect_true(is.matrix(ml_fit))
 })
 
@@ -220,7 +197,7 @@ test_that("ML fit to simple example give reasonable output with J >> n", {
   set.seed(4323)
   n <- 10
   X <- cbind(1,rep(c(0,1),each = n/2))
-  J <- 1000
+  J <- 200
   b0 <- rnorm(J)
   b1 <- seq(-5,5,length.out = J)
   b <- rbind(b0,b1)
@@ -232,7 +209,7 @@ test_that("ML fit to simple example give reasonable output with J >> n", {
                               b1 = b1,
                               distn = "Poisson",
                               mean_z = 10)
-
+  
   ml_fit <- emuFit_micro(X,
                          Y,
                          constraint_fn = rep(list(function(x) mean(x)), 2),
@@ -280,101 +257,60 @@ test_that("unpenalized fit uses B if given, and therefore fit is quicker", {
   
 })
 
-test_that("unpenalized fit converges quicker if optimize_rows is set to TRUE", {
-  set.seed(4323)
-  J <- 100
-  n <- 100
-  X <- cbind(1,rnorm(n))
-  Y <- radEmu:::simulate_data(n = n,
-                              J = J,
-                              X = X,
-                              b0 = rnorm(J),
-                              b1 = seq(1,5,length.out = J),
-                              distn = "Poisson",
-                              # zinb_size = 2,
-                              # zinb_zero_prop = 0.7,
-                              mean_z = 10)
-  
-  start <- proc.time()
-  pl_fit_one <- emuFit_micro(X,
-                             Y,
-                             B = NULL,
-                             # constraint_fn = function(x) mean(x),
-                             maxit = 10000,
-                             tolerance = 1e-6,
-                             verbose= FALSE,
-                             optimize_rows = FALSE)
-  end <- proc.time() - start 
-  start_refit <- proc.time() 
-  pl_fit_two <- emuFit_micro(X,
-                             Y,
-                             B = NULL,
-                             # constraint_fn = function(x) mean(x),
-                             maxit = 10000,
-                             tolerance =1e-6,
-                             verbose= FALSE,
-                             optimize_rows = TRUE)
-  end_refit <- proc.time() - start_refit 
-  # confirm that new approach is faster
-  expect_true(end_refit[3] < end[3])
-  # confirm that two estimates are very similar
-  expect_true(max(pl_fit_one - pl_fit_two) < 1e-5)
-  
-})
 
-test_that("unpenalized fit converges quicker if optimize_rows is set to TRUE with large p", {
-  
-  skip(message = "Skipping because test is very slow with J = 100 and p = 8")
-  
-  set.seed(4323)
-  J <- 100
-  n <- 100
-  X <- cbind(1,rnorm(n),rnorm(n), rep(c(0, 1, 0, 0), each = 25), 
-             rep(c(0, 0, 1, 0), each = 25), rep(c(0, 0, 0, 1), each = 25), rnorm(n),
-             rep(0:1, 50))
-  B <- rbind(rnorm(J), seq(1, 5, length.out = J),
-            rnorm(J), rnorm(J), rnorm(J), rnorm(J),
-            rnorm(J), rnorm(J))
-  for (k in 1:ncol(X)) {
-    B[k, ] <- B[k, ] - radEmu:::pseudohuber_median(B[k, ], 0.1)
-  }
-  Y <- radEmu:::simulate_data(n = n,
-                              J = J,
-                              X = X,
-                              B = B,
-                              distn = "Poisson",
-                              mean_z = 10)
-  
-  start <- proc.time()
-  pl_fit_one <- emuFit_micro(X,
-                             Y,
-                             B = NULL,
-                             maxit = 10000,
-                             tolerance = 1e-6,
-                             verbose= FALSE,
-                             optimize_rows = FALSE)
-  end <- proc.time() - start 
-  start_refit <- proc.time() 
-  pl_fit_two <- emuFit_micro(X,
-                             Y,
-                             B = NULL,
-                             maxit = 10000,
-                             tolerance =1e-6,
-                             verbose= FALSE,
-                             optimize_rows = TRUE)
-  end_refit <- proc.time() - start_refit 
-  expect_true(end_refit[3] < end[3])
-  max_est_error1 <- max(pl_fit_one - B)
-  max_est_error2 <- max(pl_fit_two - B)
-  max_est_diff <- max(pl_fit_one - pl_fit_two)
-  expect_true(max_est_error1 > max_est_error2)
-  mean_est_error1 <- mean(sqrt((pl_fit_one - B)^2))
-  mean_est_error2 <- mean(sqrt((pl_fit_two - B)^2))
-  mean_est_diff <- mean(sqrt((pl_fit_one - pl_fit_two)^2))
-  expect_true(mean_est_error1 > mean_est_error2)
-  expect_true(mean_est_diff < 1e-3)
-    
-})
+# test_that("unpenalized fit converges quicker if optimize_rows is set to TRUE with large p", {
+#   
+#   skip(message = "Skipping because test is very slow with J = 100 and p = 8")
+#   
+#   set.seed(4323)
+#   J <- 100
+#   n <- 100
+#   X <- cbind(1,rnorm(n),rnorm(n), rep(c(0, 1, 0, 0), each = 25), 
+#              rep(c(0, 0, 1, 0), each = 25), rep(c(0, 0, 0, 1), each = 25), rnorm(n),
+#              rep(0:1, 50))
+#   B <- rbind(rnorm(J), seq(1, 5, length.out = J),
+#              rnorm(J), rnorm(J), rnorm(J), rnorm(J),
+#              rnorm(J), rnorm(J))
+#   for (k in 1:ncol(X)) {
+#     B[k, ] <- B[k, ] - radEmu:::pseudohuber_median(B[k, ], 0.1)
+#   }
+#   Y <- radEmu:::simulate_data(n = n,
+#                               J = J,
+#                               X = X,
+#                               B = B,
+#                               distn = "Poisson",
+#                               mean_z = 10)
+#   
+#   start <- proc.time()
+#   pl_fit_one <- emuFit_micro(X,
+#                              Y,
+#                              B = NULL,
+#                              maxit = 10000,
+#                              tolerance = 1e-6,
+#                              verbose= FALSE,
+#                              optimize_rows = FALSE)
+#   end <- proc.time() - start 
+#   start_refit <- proc.time() 
+#   pl_fit_two <- emuFit_micro(X,
+#                              Y,
+#                              B = NULL,
+#                              maxit = 10000,
+#                              tolerance =1e-6,
+#                              verbose= FALSE,
+#                              optimize_rows = TRUE)
+#   end_refit <- proc.time() - start_refit 
+#   expect_true(end_refit[3] < end[3])
+#   max_est_error1 <- max(pl_fit_one - B)
+#   max_est_error2 <- max(pl_fit_two - B)
+#   max_est_diff <- max(pl_fit_one - pl_fit_two)
+#   expect_true(max_est_error1 > max_est_error2)
+#   mean_est_error1 <- mean(sqrt((pl_fit_one - B)^2))
+#   mean_est_error2 <- mean(sqrt((pl_fit_two - B)^2))
+#   mean_est_diff <- mean(sqrt((pl_fit_one - pl_fit_two)^2))
+#   expect_true(mean_est_error1 > mean_est_error2)
+#   expect_true(mean_est_diff < 1e-3)
+#   
+# })
 #
 #
 #

From 824aea44a35b94479b13b1bf719be5a9ad28ea44 Mon Sep 17 00:00:00 2001
From: amy <amy.willis7@gmail.com>
Date: Mon, 13 Apr 2026 16:47:27 -0700
Subject: [PATCH 4/5] test print

---
 tests/testthat/test-emuFit.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/testthat/test-emuFit.R b/tests/testthat/test-emuFit.R
index edfa5d4..f77643e 100644
--- a/tests/testthat/test-emuFit.R
+++ b/tests/testthat/test-emuFit.R
@@ -39,6 +39,7 @@ test_that("emuFit takes formulas and actually fits a model", {
                            test_kj = data.frame(k = 2, j = 1:J))
   })
   
+  expect_output(print(fitted_model))
   expect_true(all(fitted_model$coef$pval != fitted_model$coef$wald_p))
   expect_true(all(fitted_model$coef$wald_p>0 & fitted_model$coef$wald_p<1))
   expect_true(all(fitted_model$coef$pval>0 & fitted_model$coef$pval<1))

From a75e1b33dd6ed7f855d5df1d042f4994a47ecc6d Mon Sep 17 00:00:00 2001
From: amy <amy.willis7@gmail.com>
Date: Tue, 14 Apr 2026 14:36:13 -0700
Subject: [PATCH 5/5] index version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index edd40b0..a73bfb4 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: radEmu
 Title: Using Relative Abundance Data to Estimate of Multiplicative Differences in Mean Absolute Abundance 
-Version: 2.3.2.0
+Version: 2.3.2.1
 Authors@R: c(person("David", "Clausen", role = c("aut")),
              person("Amy D", "Willis", email = "adwillis@uw.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2802-4317")),
              person("Sarah", "Teichman", role = "aut"))