diff --git a/DESCRIPTION b/DESCRIPTION index 627f9d5..2b8a01a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,8 +14,6 @@ RoxygenNote: 7.3.3 Depends: R (>= 3.5.0), cir, - fastnnls, - logsum, numDeriv, parallel, stats @@ -25,6 +23,8 @@ Suggests: knitr, phyloseq, rmarkdown, - testthat (>= 3.0.0) + phyloseq, + testthat (>= 3.0.0), + tidyverse Config/testthat/edition: 3 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index d43b040..2a073fb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,7 +5,5 @@ export(bootstrap_lrt) export(estimate_parameters) export(evaluate_criterion_lr) import(cir) -import(fastnnls) -import(logsum) import(parallel) import(stats) diff --git a/R/bootstrap_ci.R b/R/bootstrap_ci.R index b4319d5..6522b73 100644 --- a/R/bootstrap_ci.R +++ b/R/bootstrap_ci.R @@ -4,6 +4,8 @@ #' @import parallel #' #' +#' +#' #' @export bootstrap_ci <- function(W, fitted_model, diff --git a/R/calculate_log_penalty.R b/R/calculate_log_penalty.R index a27be49..a2a4a96 100644 --- a/R/calculate_log_penalty.R +++ b/R/calculate_log_penalty.R @@ -9,7 +9,6 @@ #' treated as known #' @param barrier_t The current value of t, the barrier penalty parameter #' -#' @import logsum #' #' @return The calculated value of the barrier penalty calculate_log_penalty <- function(varying_lr_df, @@ -22,13 +21,13 @@ calculate_log_penalty <- function(varying_lr_df, log_P <- lapply(which_rho_k, function(k) (varying_lr_df$value[varying_lr_df$param == "rho" & varying_lr_df$k == k]) %>% - (function(x) c(x,0) - logsum::sum_of_logs(c(x,0)))) + (function(x) c(x,0) - sum_of_logs(c(x,0)))) log_P_tilde <- lapply(which_rho_tilde_k, function(k) (varying_lr_df$value[ varying_lr_df$param == "rho_tilde" & varying_lr_df$k == k]) %>% - (function(x) c(x,0) - logsum::sum_of_logs(c(x,0)))) + (function(x) c(x,0) - sum_of_logs(c(x,0)))) log_ra_sum <- do.call(sum,log_P) + do.call(sum,log_P_tilde) return((-1/barrier_t)*log_ra_sum) diff --git a/R/fastnnls.R b/R/fastnnls.R new file mode 100644 index 0000000..6a7e400 --- /dev/null +++ b/R/fastnnls.R @@ -0,0 +1,335 @@ +fast_nnls <- function(ZTx, + ZTZ, + active = NULL, + unconstrained = NULL, + tolerance = 1e-10){ + # store number of covariates + p <- nrow(ZTZ) + + if(length(intersect(active,unconstrained)) >0){ + stop("A variable cannot be assigned to the unconstrained set and to the active set.") + } + ### getting stuck in this initial loop for some reason! check it out! + if(!is.null(active)){ + + #set d = 0 to start + d <- matrix(0,nrow = p,ncol = 1) + #check appropriateness of initial active set + s <- matrix(0,nrow = p,ncol = 1) + not_active <- !(1:p %in% active) + s_passive <- qr.solve(ZTZ[not_active,not_active,drop = F],ZTx[not_active,drop = F],tol = 1e-14) + s[not_active,] <- s_passive + + ### redefine not_active to excluded unconstrained variables (if included) + if(!is.null(unconstrained)){ + not_active <- !(1:p %in% union(active,unconstrained)) + s_passive <- s[not_active] + } + #Enter inner loop + if(length(s_passive) >0){ + while(ifelse(length(s_passive)>0,min(s_passive)<= 0,FALSE)){ + + alphas <- (-(d[not_active]) / (d[not_active] - s_passive)) + alphas <- alphas[s_passive<0] + alpha <- min(-alphas) + + + d_archive <- d + d <- d + alpha*(s - d) + stopifnot(sum(is.na(d))==0) + + if(is.null(unconstrained)){ + active <- (1:p)[d==0] + not_active <- !(1:p %in% active) + } + if(!is.null(unconstrained)){ + active <- (1:p)[(d==0)&(!((1:p)%in%unconstrained))] + not_active <- !(1:p %in% active) + } + + + s <- matrix(0,nrow = p) + if(length(active)>0){ + s_passive <- qr.solve(ZTZ[not_active,not_active],ZTx[not_active,],tol = 1e-14) + s[not_active,] <- s_passive + } else{ + s <- s_passive <- qr.solve(ZTZ,ZTx,tol = 1e-14) + } + + if(!is.null(unconstrained)){ + not_active <- !(1:p %in% union(active,unconstrained)) + s_passive <- s[not_active] + } + + }} + d <- s + active <- (1:p)[d==0] + + w <- ZTx - ZTZ%*%d + } + if(is.null(active)){ + if(is.null(unconstrained)){ + active <- 1:p + d <- matrix(0,nrow = p, ncol = 1) + w <- ZTx + } else{ + active <- (1:p)[!(1:p %in% unconstrained)] + d <- matrix(0,nrow = p, ncol = 1) + d[unconstrained] <- + qr.solve(ZTZ[unconstrained,unconstrained],ZTx[unconstrained,], + tol = 1e-14) + w <- (ZTx - ZTZ%*%d) + } + } + + # counter <- 0 + #Enter main loop - ???? are loop conditions correct ??? + while( (ifelse(length(active) >0,max(w[active]), -1)>tolerance) ){ + # counter <- counter + 1 + # print(counter) + + to_remove <- which.max(w[active]) + # print(to_remove) + active <- active[-to_remove] + + + if(length(active)>0){ + s <- matrix(0,nrow = p) + not_active <- !(1:p %in% active) + + s_passive <- qr.solve(ZTZ[not_active,not_active],ZTx[not_active,],tol = 1e-14) + s[not_active,] <- s_passive + } else{ + s <- s_passive <- qr.solve(ZTZ,ZTx,tol = 1e-14) + } + + ### redefine not_active to excluded unconstrained variables (if included) + if(!is.null(unconstrained)){ + not_active <- !(1:p %in% union(active,unconstrained)) + s_passive <- s[not_active] + } + + #Enter inner loop + if(length(s_passive) >0){ + while(ifelse(length(s_passive)>0,min(s_passive)<= 0,FALSE)){ + + alphas <- (-(d[not_active]) / (d[not_active] - s_passive)) + alphas <- alphas[s_passive<0] + alpha <- min(-alphas) + + + d_archive <- d + d <- d + alpha*(s - d) + stopifnot(sum(is.na(d))==0) + + if(is.null(unconstrained)){ + active <- (1:p)[d==0] + not_active <- !(1:p %in% active) + } + if(!is.null(unconstrained)){ + active <- (1:p)[(d==0)&(!((1:p)%in%unconstrained))] + not_active <- !(1:p %in% active) + } + + s <- matrix(0,nrow = p) + if(length(active)>0){ + s_passive <- qr.solve(ZTZ[not_active,not_active],ZTx[not_active,],tol = 1e-14) + s[not_active,] <- s_passive + } else{ + s <- s_passive <- qr.solve(ZTZ,ZTx,tol = 1e-14) + } + + ### redefine not_active to excluded unconstrained variables (if included) + if(!is.null(unconstrained)){ + not_active <- !(1:p %in% union(active,unconstrained)) + s_passive <- s[not_active] + } + + }} + + d <- s + + stopifnot(sum(is.na(d))==0) + + + w <- (ZTx - ZTZ%*%d) + + # counter <- counter + 1 + + # print(counter) + # stopifnot(counter<13) + + + + } + + return(d) +} + +fast_nnls_Matrix <- function(ZTx, + ZTZ, + active = NULL, + unconstrained = NULL, + tolerance = 1e-10){ + # store number of covariates + p <- nrow(ZTZ) + + if(length(intersect(active,unconstrained)) >0){ + stop("A variable cannot be assigned to the unconstrained set and to the active set.") + } + ### getting stuck in this initial loop for some reason! check it out! + if(!is.null(active)){ + + #set d = 0 to start + d <- matrix(0,nrow = p,ncol = 1) + #check appropriateness of initial active set + s <- matrix(0,nrow = p,ncol = 1) + not_active <- !(1:p %in% active) + s_passive <- qr.solve(ZTZ[not_active,not_active,drop = F],ZTx[not_active,drop = F],tol = 1e-14) + s[not_active,] <- s_passive + + ### redefine not_active to excluded unconstrained variables (if included) + if(!is.null(unconstrained)){ + not_active <- !(1:p %in% union(active,unconstrained)) + s_passive <- s[not_active] + } + #Enter inner loop + if(length(s_passive) >0){ + while(ifelse(length(s_passive)>0,min(s_passive)<= 0,FALSE)){ + + alphas <- (-(d[not_active]) / (d[not_active] - s_passive)) + alphas <- alphas[s_passive<0] + alpha <- min(-alphas) + + + d_archive <- d + d <- d + alpha*(s - d) + stopifnot(sum(is.na(d))==0) + + if(is.null(unconstrained)){ + active <- (1:p)[d==0] + not_active <- !(1:p %in% active) + } + if(!is.null(unconstrained)){ + active <- (1:p)[(d==0)&(!((1:p)%in%unconstrained))] + not_active <- !(1:p %in% active) + } + + + s <- matrix(0,nrow = p) + if(length(active)>0){ + s_passive <- qr.solve(ZTZ[not_active,not_active],ZTx[not_active,],tol = 1e-14) + s[not_active,] <- s_passive + } else{ + s <- s_passive <- qr.solve(ZTZ,ZTx,tol = 1e-14) + } + + if(!is.null(unconstrained)){ + not_active <- !(1:p %in% union(active,unconstrained)) + s_passive <- s[not_active] + } + + }} + d <- s + active <- (1:p)[d==0] + + w <- ZTx - ZTZ%*%d + } + if(is.null(active)){ + if(is.null(unconstrained)){ + active <- 1:p + d <- matrix(0,nrow = p, ncol = 1) + w <- ZTx + } else{ + active <- (1:p)[!(1:p %in% unconstrained)] + d <- matrix(0,nrow = p, ncol = 1) + d[unconstrained] <- + qr.solve(ZTZ[unconstrained,unconstrained],ZTx[unconstrained,], + tol = 1e-14) + w <- (ZTx - ZTZ%*%d) + } + } + + # counter <- 0 + #Enter main loop - ???? are loop conditions correct ??? + while( (ifelse(length(active) >0,max(w[active]), -1)>tolerance) ){ + # counter <- counter + 1 + # print(counter) + + to_remove <- which.max(w[active]) + # print(to_remove) + active <- active[-to_remove] + + + if(length(active)>0){ + s <- matrix(0,nrow = p) + not_active <- !(1:p %in% active) + + s_passive <- qr.solve(ZTZ[not_active,not_active],ZTx[not_active,],tol = 1e-14) + s[not_active,] <- s_passive + } else{ + s <- s_passive <- qr.solve(ZTZ,ZTx,tol = 1e-14) + } + + ### redefine not_active to excluded unconstrained variables (if included) + if(!is.null(unconstrained)){ + not_active <- !(1:p %in% union(active,unconstrained)) + s_passive <- s[not_active] + } + + #Enter inner loop + if(length(s_passive) >0){ + while(ifelse(length(s_passive)>0,min(s_passive)<= 0,FALSE)){ + + alphas <- (-(d[not_active]) / (d[not_active] - s_passive)) + alphas <- alphas[s_passive<0] + alpha <- min(-alphas) + + + d_archive <- d + d <- d + alpha*(s - d) + stopifnot(sum(is.na(d))==0) + + if(is.null(unconstrained)){ + active <- (1:p)[d==0] + not_active <- !(1:p %in% active) + } + if(!is.null(unconstrained)){ + active <- (1:p)[(d==0)&(!((1:p)%in%unconstrained))] + not_active <- !(1:p %in% active) + } + + s <- matrix(0,nrow = p) + if(length(active)>0){ + s_passive <- qr.solve(ZTZ[not_active,not_active],ZTx[not_active,],tol = 1e-14) + s[not_active,] <- s_passive + } else{ + s <- s_passive <- qr.solve(ZTZ,ZTx,tol = 1e-14) + } + + ### redefine not_active to excluded unconstrained variables (if included) + if(!is.null(unconstrained)){ + not_active <- !(1:p %in% union(active,unconstrained)) + s_passive <- s[not_active] + } + + }} + + d <- s + + stopifnot(sum(is.na(d))==0) + + + w <- (ZTx - ZTZ%*%d) + + # counter <- counter + 1 + + # print(counter) + # stopifnot(counter<13) + + + + } + + return(d) +} diff --git a/R/logsum.R b/R/logsum.R new file mode 100644 index 0000000..5d98936 --- /dev/null +++ b/R/logsum.R @@ -0,0 +1,74 @@ +#' Sum positive numbers via their logarithms +#' Given x and y, this function returns log(exp(x) + exp(y)). +#' +#' @author David Clausen +#' +#' @param x the thing to be summed +#' +sum_of_logs <- function(x, y = NULL){ #calculates log(sum(exp(x))) + + if(is.null(y)){ + if(length(x) ==1){ + return(x) + } else{ + if(sum(!is.infinite(x)) == 0){ + if(sum(sign(x) != -1) == 0){ + return(-Inf) + } else{ + return(Inf) + } + } + x <- x[order(x, decreasing = TRUE)] + return(x[1] + log(1 + sum(exp(x[2:length(x)] - x[1]))))} + } else{ + if(length(x) != length(y)){ + stop("If y is provided, x and y must be the same length") + } + n <- length(x) + maxes <- pmax(x,y) + mins <- pmin(x,y) + return(maxes + log(1 + exp(mins - maxes))) + } +} + + + +diff_of_logs <- function(larger, smaller){ + return(larger + log( 1 - exp(smaller - larger) ) ) +} + + +signed_log_sum <- function(log_magnitudes, + signs){ + # if no nonzero summands, return zero + if(sum(signs != 0) == 0){ + return(c(0,0)) + } + + # if no negative summands, return sum of positive summands + if(sum(signs == -1) == 0){ + return(c(sum_of_logs(log_magnitudes[signs == 1]),1)) + } + + # if no positive summands, return sum of negative summands (with sign -1) + if(sum(signs == 1) == 0){ + return(c(sum_of_logs(log_magnitudes[signs == -1]),-1)) + } + + # otherwise sum positive and negative magnitudes separately + positives <- log_magnitudes[signs == 1] + negatives <- log_magnitudes[signs == -1] + + pos_logsum <- sum_of_logs(positives) + neg_logsum <- sum_of_logs(negatives) + + # if positive magnitude equals negative magnitude, return zero + if(pos_logsum == neg_logsum){ + return(c(0,0)) + } + + # otherwise return difference of positive and negative terms with appropriate sign + return(c(diff_of_logs(max(pos_logsum,neg_logsum), + min(pos_logsum, neg_logsum)), + ifelse(pos_logsum > neg_logsum, 1, -1))) +} \ No newline at end of file diff --git a/R/simpl_auglag_fnnls.R b/R/simpl_auglag_fnnls.R index ad43275..7f55e22 100644 --- a/R/simpl_auglag_fnnls.R +++ b/R/simpl_auglag_fnnls.R @@ -1,4 +1,3 @@ -#' @import fastnnls #' #' simpl_auglag_fnnls <- function(x, @@ -52,7 +51,7 @@ simpl_auglag_fnnls <- function(x, } ## END Amy July 8 2023 - x <- fastnnls::fast_nnls(ZTx = Ab, ZTZ = ATA, + x <- fast_nnls(ZTx = Ab, ZTZ = ATA, tolerance = constraint_tolerance) V <- abs(sum(x) - 1) diff --git a/README.Rmd b/README.Rmd index a5a0577..79f8e2a 100644 --- a/README.Rmd +++ b/README.Rmd @@ -37,15 +37,6 @@ You can install the development version of tinyvamp from [GitHub](https://github remotes::install_github("statdivlab/tinyvamp") ``` -If you haven't already, you may need to install `fastnnls` and `logsum`, too: - -``` r -# install.packages("remotes") -remotes::install_github("ailurophilia/fastnnls") -remotes::install_github("ailurophilia/logsum") -remotes::install_github("statdivlab/tinyvamp") -``` - diff --git a/README.md b/README.md index 628eca3..b2ebfdc 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ # tinyvamp + [![Coverage @@ -33,21 +34,16 @@ You can install the development version of tinyvamp from remotes::install_github("statdivlab/tinyvamp") ``` -If you haven’t already, you may need to install `fastnnls` and `logsum`, -too: - -``` r -# install.packages("remotes") -remotes::install_github("ailurophilia/fastnnls") -remotes::install_github("ailurophilia/logsum") -remotes::install_github("statdivlab/tinyvamp") -``` - + + + + + ## Humans diff --git a/data/karstens.rdata b/data/karstens.rdata deleted file mode 100644 index 82718a7..0000000 Binary files a/data/karstens.rdata and /dev/null differ diff --git a/man/evaluate_criterion_lr.Rd b/man/evaluate_criterion_lr.Rd new file mode 100644 index 0000000..b6e64a5 --- /dev/null +++ b/man/evaluate_criterion_lr.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/evaluate_criterion_lr.R +\name{evaluate_criterion_lr} +\alias{evaluate_criterion_lr} +\title{Criterion Evaluation Function} +\usage{ +evaluate_criterion_lr( + W, + X, + Z, + Z_tilde, + Z_tilde_gamma_cols, + Z_tilde_list = NULL, + X_tilde, + fixed_df, + varying_df, + varying_lr_df = NULL, + barrier_t = NULL, + criterion = "Poisson", + lr_scale = TRUE, + include_log_penalty = TRUE, + wts = NULL, + gmm_inv_wts = NULL, + return_gmm_inv_weights = FALSE +) +} +\description{ +Criterion Evaluation Function +} diff --git a/man/sum_of_logs.Rd b/man/sum_of_logs.Rd new file mode 100644 index 0000000..79ac1eb --- /dev/null +++ b/man/sum_of_logs.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/logsum.R +\name{sum_of_logs} +\alias{sum_of_logs} +\title{Sum positive numbers via their logarithms +Given x and y, this function returns log(exp(x) + exp(y)).} +\usage{ +sum_of_logs(x, y = NULL) +} +\arguments{ +\item{x}{the thing to be summed} +} +\description{ +Sum positive numbers via their logarithms +Given x and y, this function returns log(exp(x) + exp(y)). +} +\author{ +David Clausen +} diff --git a/tests/testthat/test-calculate_log_penalty.R b/tests/testthat/test-calculate_log_penalty.R index 3fb077d..6d96298 100644 --- a/tests/testthat/test-calculate_log_penalty.R +++ b/tests/testthat/test-calculate_log_penalty.R @@ -1,6 +1,4 @@ test_that("log penalty is correct", { - require(logsum) - P <- matrix(1/7,ncol = 7, nrow = 1) P_tilde <- matrix(1/7,ncol = 7, nrow = 1) diff --git a/tests/testthat/test-do_one_simulation.R b/tests/testthat/test-do_one_simulation.R index ec1593c..4ccf1ca 100644 --- a/tests/testthat/test-do_one_simulation.R +++ b/tests/testthat/test-do_one_simulation.R @@ -8,6 +8,7 @@ test_that("do_one_simulation returns list of lists", { seed = 1, label = "test", n_boot = the_n_boot, + parallelize=FALSE, verbose =FALSE, folder_name = "test", return_dont_save = TRUE) diff --git a/vignettes/compare-experiments.Rmd b/vignettes/compare-experiments.Rmd index 8de34bf..d1d336a 100644 --- a/vignettes/compare-experiments.Rmd +++ b/vignettes/compare-experiments.Rmd @@ -29,7 +29,7 @@ Because of its flexibility, fitting a model using `tinyvamp` involves specifying ## Setup -We will start by loading the relevant packages. You might need to install `logsum` and `fastnnls` if you haven't already -- you can do this with `remotes::install_github("ailurophilia/logsum")` and `remotes::install_github("ailurophilia/fastnnls")`. +We will start by loading the relevant packages. ```{r setup} library(tidyverse) diff --git a/vignettes/dilution-series.Rmd b/vignettes/dilution-series.Rmd index 26ca777..1c504fa 100644 --- a/vignettes/dilution-series.Rmd +++ b/vignettes/dilution-series.Rmd @@ -25,12 +25,13 @@ In this vignette, we will walk through how to use `tinyvamp` to estimate detecti ## Setup -We will start by loading the relevant packages. You might need to install `logsum` and `fastnnls` if you haven't already -- you can do this with `remotes::install_github("ailurophilia/logsum")` and `remotes::install_github("ailurophilia/fastnnls")`. Also, `speedyseq` is awesome, and you can get it with `remotes::install_github("mikemc/speedyseq")`. +We will start by loading the relevant packages. We recommend using `speedyseq` instead of `phyloseq`. It's awesome, and you can get it with `remotes::install_github("mikemc/speedyseq")`. ```{r setup} library(tidyverse) library(tinyvamp) -library(speedyseq) +library(phyloseq) +# library(speedyseq) ``` Now let's load the relevant data and inspect it. This data is from [Karstens et al. (2019)](https://journals.asm.org/doi/full/10.1128/mSystems.00290-19?rfr_dat=cr_pub++0pubmed&url_ver=Z39.88-2003&rfr_id=ori%3Arid%3Acrossref.org), who generated 9 samples via three-fold dilutions of a synthetic community containing 8 distinct strains of bacteria which each account for 12.5% of the DNA in the community. Despite only 8 strains being present in the synthetic community, 248 total strains were identified using DADA2 (see Section 12.1 of [our paper](https://arxiv.org/pdf/2204.12733.pdf) for details on data processing). @@ -49,7 +50,7 @@ karstens_phyloseq %>% sample_data We know which genera correspond to taxa in the mock, so let's create a vector telling us which rows of the ASV table contain data on the mock taxa ```{r} -genera_data <- karstens_phyloseq %>% tax_table %>% as_tibble %>% select("Genus") %>% pull +genera_data <- karstens_phyloseq %>% tax_table %>% as.data.frame %>% as_tibble %>% select("Genus") %>% pull Pseudomonas <- genera_data %>% str_detect("Pseudomonas") %>% which Escherichia <- genera_data %>% str_detect("Escherichia") %>% which Salmonella <- genera_data %>% str_detect("Salmonella") %>% which @@ -243,7 +244,8 @@ full_karstens_model$varying %>% as_tibble Wow, `r full_karstens_model$varying %>% as_tibble %>% nrow` parameters were estimated pretty quickly! That's thanks to *a lot* of hard work and cleverness on David's part. Lets look specifically at the estimated efficiencies, and I will join it to the taxon names data to make it easier to interpret ```{r, message=FALSE} -full_karstens_model$varying %>% as_tibble %>% filter(param == "B") %>% +estd_bs <- full_karstens_model$varying %>% as_tibble %>% dplyr::filter(param == "B") +estd_bs %>% inner_join(tibble(j=241:247, name = genera_data[mock_taxa][-8]), by="j") ```