Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
Package: radEmu
Title: Using Relative Abundance Data to Estimate of Multiplicative Differences in Mean Absolute Abundance
Version: 2.3.1.0
Version: 2.3.2.0
Authors@R: c(person("David", "Clausen", role = c("aut")),
person("Amy D", "Willis", email = "adwillis@uw.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2802-4317")),
person("Sarah", "Teichman", role = "aut"))
Description: A differential abundance method for the analysis of microbiome data. radEmu estimates fold-differences in the abundance of taxa across samples relative to "typical" fold-differences. Notably, it does not require pseudocounts, nor choosing a denominator taxon.
Description: A differential abundance method for the analysis of microbiome data. 'radEmu' estimates fold-differences in the abundance of taxa across samples relative to "typical" fold-differences. Notably, it does not require pseudocounts, nor choosing a denominator taxon. For more details, see Clausen et al. (2026) <doi:10.1093/biomet/asag009>.
URL: https://github.com/statdivlab/radEmu, https://statdivlab.github.io/radEmu/
License: MIT + file LICENSE
Encoding: UTF-8
Expand Down
15 changes: 2 additions & 13 deletions R/emuFit.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Fit radEmu model
#'
#' @param Y an n x J matrix or dataframe of nonnegative observations, or a phyloseq object containing an otu table and sample data.
#' @param Y an n x J matrix or dataframe of nonnegative observations, or a `phyloseq` or `TreeSummarizedExperiment` object containing an otu table and sample data.
#' @param X an n x p design matrix (either provide \code{X} or \code{data} and \code{formula})
#' @param formula a one-sided formula specifying the form of the mean model to be fit (used with \code{data})
#' @param data an n x p data frame containing variables given in \code{formula}
Expand Down Expand Up @@ -120,25 +120,14 @@
#' @import MASS
#'
#' @examples
#' # data frame example
#' # data frame example (for phyloseq and TreeSummarizedExperiment examples, see the vignettes)
#' data(wirbel_sample_small)
#' data(wirbel_otu_small)
#' emuRes <- emuFit(formula = ~ Group, data = wirbel_sample_small, Y = wirbel_otu_small,
#' test_kj = data.frame(k = 2, j = 1), tolerance = 0.01)
#' # here we set large tolerances for the example to run quickly,
#' # but we recommend smaller tolerances in practice
#'
#' # TreeSummarizedExperiment example (only run this if you have TreeSummarizedExperiment installed)
#' \dontrun{
#' library("TreeSummarizedExperiment")
#' example("TreeSummarizedExperiment")
#' assayNames(tse) <- "counts"
#' emuRes <- emuFit(Y = tse, formula = ~ condition, assay_name = "counts",
#' test_kj = data.frame(k = 2, j = 1), tolerance = 0.01)
#' # here we set large tolerances for the example to run quickly,
#' # but we recommend smaller tolerances in practice
#' }
#'
#' @export
#'
emuFit <- function(Y,
Expand Down
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,20 @@ Sadly we do not yet have a ~~logo~~ nice-looking logo. If you would like to desi

## Installation

To download the radEmu package, use the code below.
To download the stable release of radEmu (version 2.3.1) from CRAN, use the code below.

``` r
install.packages("radEmu")
```

To download the current development version of the radEmu package, use the code below.

``` r
# install.packages("devtools")
devtools::install_github("statdivlab/radEmu")
library(radEmu)
```

We are currently only releasing `radEmu` via GitHub. If you'd like us to consider submitting to CRAN, please let us know by opening an issue.

## Use

The vignettes demonstrate example usage of the main functions. Please [file an issue](https://github.com/statdivlab/radEmu/issues) if you have a request for a tutorial that is not currently included. The following code shows the easy-to-use syntax if your data is in a `phyloseq` object, and you want to estimate parameters for all taxa and run a test for the parameter associated with "Group" and taxon 1:
Expand Down
15 changes: 2 additions & 13 deletions man/emuFit.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion tests/testthat/test-X_cup_from_X.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
test_that("X_cup_from_X_fast works and is faster than X_cup_from_X", {

X <- cbind(1, rnorm(100), runif(100))
J <- 200
start1 <- proc.time()
Expand All @@ -9,5 +10,5 @@ test_that("X_cup_from_X_fast works and is faster than X_cup_from_X", {
end2 <- proc.time() - start2

expect_true(all.equal(Xcup1, Xcup2))
expect_true(end2[3] < end1[3])
#expect_true(end2[3] < end1[3])
})
123 changes: 6 additions & 117 deletions tests/testthat/test-emuFit_micro.R
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,9 @@ test_that("ML fit to simple example give reasonable output with J >> n", {
})

test_that("unpenalized fit uses B if given, and therefore fit is quicker", {

skip("don't test timing automatically")

set.seed(4323)
X <- cbind(1,rnorm(10))
J <- 10
Expand Down Expand Up @@ -281,6 +284,9 @@ test_that("unpenalized fit uses B if given, and therefore fit is quicker", {
})

test_that("unpenalized fit converges quicker if optimize_rows is set to TRUE", {

skip("don't test timing automatically")

set.seed(4323)
J <- 100
n <- 100
Expand Down Expand Up @@ -375,122 +381,5 @@ test_that("unpenalized fit converges quicker if optimize_rows is set to TRUE wit
expect_true(mean_est_diff < 1e-3)

})
#
#
#

#
# test_that("ML fit to multiple regressors, large n, and excess-Poisson variance gives reasonable output", {
# set.seed(4323)
# n <- 800
# X <- cbind(1,rep(c(0,1),each = n/2),rnorm(n),rnorm(n),rnorm(n))
# z <- rnorm(n) +5
# J <- 10
# p <- 2
#
# b0 <- rnorm(J)
# b1 <- seq(1,10,length.out = J)
# b <- rbind(b0,b1)
# Y <- matrix(NA,ncol = J, nrow = n)
#
# for(i in 1:n){
# for(j in 1:J){
# temp_mean <- exp(X[i,1:2,drop = FALSE]%*%b[,j,drop = FALSE] + z[i])
# Y[i,j] <- rnbinom(1,mu= temp_mean,size = 0.25)#rpois(1, lambda = temp_mean)
# }
# }
# ml_fit <- emuFit_micro(X,
# Y,
# constraint_fn = function(x) mean(x),
# maxit = 200,
# tolerance = 1e-2)
#
# # plot(b1-mean(b1),ml_fit[2,])
# # abline(a = 0,b = 1,lty =2,col = "red")
#
# expect_true(max(abs(ml_fit[2,] - (b1 - mean(b1))))<.5)
# })
#
#
# test_that("ML fit to multiple regressors, moderate n, moderate J, and excess-Poisson variance gives reasonable output", {
# set.seed(4323)
# n <- 40
# X <- cbind(1,rep(c(0,1),each = n/2),rnorm(n),rnorm(n),rnorm(n))
# z <- rnorm(n)
# J <- 100
# p <- 2
#
# b0 <- rnorm(J)
# b1 <- seq(1,10,length.out = J)
# b <- rbind(b0,b1)
# Y <- matrix(NA,ncol = J, nrow = n)
#
# for(i in 1:n){
# for(j in 1:J){
# temp_mean <- exp(X[i,1:2,drop = FALSE]%*%b[,j,drop = FALSE] + z[i])
# Y[i,j] <- rnbinom(1,mu= temp_mean,size = 0.25)#rpois(1, lambda = temp_mean)
# }
# }
# ml_fit <- emuFit_micro(X,
# Y,
# constraint_fn = function(x) mean(x),
# maxit = 200,
# tolerance = 1e-1,
# max_step = 0.5)
#
# # plot(b1-mean(b1),ml_fit[2,])
# # abline(a = 0,b = 1, lty = 2, col = "red")
#
# b1_hat <- ml_fit[2,]
# expect_true(abs(lm(b1_hat~b1)$coef[2] - 1) < 0.2)
# # abline(a = 0,b = 1,lty =2,col = "red")
#
# })
#
# # t(ml_fit) %>% as.data.frame() %>% mutate(j = 1:J) %>%
# # pivot_longer(-j) %>%
# # ggplot() +
# # geom_point(aes(x = j,y = value, color = name)) +
# # geom_line(aes(x = j, y = value, group = name, color = name)) +
# # theme_bw()
#
#
#
# test_that("ML fit to simple example give reasonable output with J > n", {
# set.seed(4323)
# X <- cbind(1,rep(c(0,1),each = 20))
# z <- rnorm(40) +8
# b0 <- rnorm(80)
# b1 <- seq(-5,5,length.out = 80)
# b <- rbind(b0,b1)
# Y <- matrix(NA,ncol = 80, nrow = 40)
#
# for(i in 1:40){
# for(j in 1:80){
# temp_mean <- exp(X[i,,drop = FALSE]%*%b[,j,drop = FALSE] + z[i])
# Y[i,j] <- rpois(1, lambda = temp_mean)
# }
# }
# ml_fit <- emuFit_micro(X,
# Y,
# B = NULL,
# constraint_fn = function(x) mean(x),
# maxit = 500,
# tolerance = 0.01)
#
#
#
#
#
# expect_true(max(abs(ml_fit[2,] - b1))<.1)
#
# # X_cup <- X_cup_from_X(X,10)
# # B_cup <- B_cup_from_B(ml_fit)
#
#
#
# })
#
#


3 changes: 3 additions & 0 deletions tests/testthat/test-emuFit_micro_penalized.R
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ less efficient implementation (and that both substantially differ from MLE", {
})

test_that("penalized fit uses B if given, and therefore fit is quicker", {

skip("don't test timing automatically")

set.seed(4323)
X <- cbind(1,rnorm(10))
J <- 10
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-fit_null_discrete.R
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ test_that("new discrete is fast", {
})

## check faster
expect_lt(t_discrete[3], t_orig[3])
#expect_lt(t_discrete[3], t_orig[3])

## check that the (my_kstar, my_kstar) constraint is satisfied
expect_equal(pseudohuber_median(out_discrete$B[my_kstar, -my_jstar]),
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-get_G_for_augmentations.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ test_that("get_G_for_augmentations_fast works and is faster than get_G_for_augme
end2 <- proc.time() - start2

expect_true(all.equal(g1, g2))
expect_true(end2[3] < end1[3])
#expect_true(end2[3] < end1[3])
})

97 changes: 0 additions & 97 deletions tests/testthat/test-macro_fisher_null.R
Original file line number Diff line number Diff line change
Expand Up @@ -353,100 +353,3 @@ test_that("We take similar step as we'd take using numerical derivatives in a mo

})

test_that("We take same step as we'd take using numerical derivatives when gap, rho, u are zero", {
set.seed(59542234)
n <- 10
p <- 2
X <- cbind(1,rep(c(0,1),each = n/2))
J <- 10000
z <- rnorm(n) +8
b0 <- rnorm(J)
b1 <- seq(1,10,length.out = J)
b1 <- b1 - mean(b1)
b0 <- b0 - mean(b0)
b <- rbind(b0,b1)
Y <- matrix(NA,ncol = J, nrow = n)

k_constr <- 2
j_constr <- 1
p <- 2

constraint_fn <- rep(list(function(x){ pseudohuber_median(x,0.1)}), 2)
# constraint_fn <- function(x){mean(x)}

##### Arguments to fix:

constraint_grad_fn <- rep(list(function(x){dpseudohuber_median_dx(x,0.1)}), 2)
# constraint_grad_fn <- function(x){ rep(1/length(x), length(x))}
rho_init = 1
tau = 1.2
kappa = 0.8
obj_tol = 100
score_tol <- 1e-3
constraint_tol = 1e-5
init_tol = 1e6
c1 = 1e-4
maxit = 1000
inner_maxit = 25

Y[] <- 0
for(i in 1:n){
while(sum(Y[i,])==0){
for(j in 1:J){
temp_mean <- exp(X[i,,drop = FALSE]%*%b[,j,drop = FALSE] + z[i])
Y[i,j] <- rpois(1, lambda = temp_mean)
# Y[i,j] <- rnbinom(1,mu = temp_mean, size = 3)*rbinom(1,1,0.6)
}
}
}

j_ref <- 5

full_fit <- #suppressMessages(
emuFit_micro_penalized(X = X,
Y = Y,
B = NULL,
constraint_fn = rep(list(mean), 2),
tolerance = 10,
verbose = FALSE)#)

B <- full_fit$B
B[1,] <- B[1,] - B[1,j_ref]
B[2,] <- B[2,] - B[2,j_ref]

Y_aug <- full_fit$Y_augmented

B[k_constr,j_constr] <- constraint_fn[[k_constr]](B[k_constr,-j_constr])

u <- 0
rho <- 0

z <- update_z(Y,X,B)

macro_time <- try(system.time(macro_fisher_null(X = X,
Y = Y_aug,
B = B,
z = z,
J = J,
p = p,
k_constr = k_constr,
j_constr = j_constr,
j_ref = j_ref,
rho = rho,
u = u,
constraint_fn = constraint_fn,
constraint_grad_fn = constraint_grad_fn,
stepsize = 0.5,
c1 = 1e-4,
regularization = 0,
debug= FALSE)))

if(inherits(macro_time, "try-error")){
expect_true(FALSE)
}
expect_true(macro_time[3]<15)
#failing this test indicates that macro_fisher_null may be directly
#computing the full inverse of the (approximate) hessian matrix of the log likelihood
#this includes an outer product that does not need to be computed
})

Loading
Loading