diff --git a/DESCRIPTION b/DESCRIPTION index c62fef2..e565516 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,6 +23,7 @@ Depends: Imports: broom, dplyr, + effsize, limma, logistf, rlang, diff --git a/NAMESPACE b/NAMESPACE index f7a4613..d3c30b4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,6 +21,8 @@ importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,n_distinct) importFrom(dplyr,ungroup) +importFrom(effsize,cliff.delta) +importFrom(effsize,cohen.d) importFrom(limma,eBayes) importFrom(limma,lmFit) importFrom(logistf,logistf) diff --git a/R/getEffectSize.R b/R/getEffectSize.R new file mode 100644 index 0000000..fea254f --- /dev/null +++ b/R/getEffectSize.R @@ -0,0 +1,29 @@ +## Helper for effect size calculations + +#' Calculate effect sizes for features in a SummarizedExperiment +#' +#' Wrapper that computes per-feature effect sizes from long-format data. +#' +#' @param tse A SummarizedExperiment-like object. +#' @param formula A formula where LHS is feature/assay and RHS grouping. +#' @param method Character scalar, one of 'cliff' or 'cohen'. +#' @param pair.by Optional pairing column name (passed to .get_long_data). +#' @return A data.frame with a `rownames` column and method-specific columns. +#' @keywords internal +#' @noRd +getEffectSize <- function(tse, formula, method = c("cliff", "cohen"), + pair.by = NULL) { + method <- match.arg(method) + df <- .get_long_data(tse, formula, pair.by) + + if (exists(".calculate_effect_size", mode = "function")) { + return(.calculate_effect_size( + df = df, + formula = formula, + pair.by = pair.by, + effect_size = method + )) + } + + return(NULL) +} diff --git a/R/getWilcoxonTest.R b/R/getWilcoxonTest.R index 0d5a141..cd80f4a 100644 --- a/R/getWilcoxonTest.R +++ b/R/getWilcoxonTest.R @@ -10,9 +10,14 @@ #' grouping variable, e.g., \code{counts ~ Group}. #' @param pair.by \code{Character scalar} or \code{NULL}. Column for pairing #' samples in paired tests. (Default: \code{NULL}) +#' @param effect_size \code{Character scalar}. Effect size to compute. +#' Supported values are \code{"none"}, \code{"cliff"}, and \code{"cohen"}. +#' (Default: \code{"none"}) #' @param ... Additional arguments passed to \code{wilcox_test}. #' -#' @return A \code{data.frame} with test results. +#' @return A \code{data.frame} with test results. If +#' \code{effect_size = "cliff"} or \code{"cohen"} and the comparison is +#' unpaired with exactly two groups, output also includes effect size columns. #' #' @examples #' data(peerj13075, package = "mia") @@ -20,18 +25,49 @@ #' tse <- mia::agglomerateByRank(tse, "phylum") #' res <- getWilcoxonTest(tse, counts ~ Geographical_location) #' +#' @importFrom dplyr left_join #' @importFrom rstatix wilcox_test #' @export -getWilcoxonTest <- function(tse, formula, pair.by = NULL, ...) { +getWilcoxonTest <- function( + tse, formula, pair.by = NULL, + effect_size = c("none", "cliff", "cohen"), ... +) { if (length(all.vars(formula[[3]])) != 1L) { stop("Formula RHS must specify exactly one grouping variable.", call. = FALSE ) } + effect_size <- match.arg(effect_size) + df <- .get_long_data(tse, formula, pair.by) res <- .calculate_rstatix( df = df, formula = formula, pair.by = pair.by, FUN = wilcox_test, ... ) + + if (effect_size != "none") { + # Only compute effect sizes for unpaired two-group comparisons. + rhs <- .get_rhs(formula) + n_groups <- sum(!is.na(unique(df[[rhs]]))) + if (is.null(pair.by) && n_groups == 2L) { + # Pre-filter tse to only tested features to avoid computing effect sizes + # for features without test results. + if (length(res$rownames) > 0L) { + tse_sub <- tse[res$rownames, , drop = FALSE] + } else { + tse_sub <- tse[0, , drop = FALSE] + } + + delta_res <- getEffectSize( + tse = tse_sub, + formula = formula, + method = effect_size, + pair.by = pair.by + ) + if (!is.null(delta_res)) { + res <- left_join(res, delta_res, by = "rownames") + } + } + } return(res) } diff --git a/R/utils.R b/R/utils.R index eb157d4..23767a9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -176,6 +176,89 @@ return(res) } +#' Calculate effect size per feature from long-format data +#' +#' Currently supports generic effect-size output for unpaired two-group comparisons. +#' +#' @keywords internal +#' @noRd +#' @importFrom dplyr bind_rows n_distinct +#' @importFrom effsize cliff.delta cohen.d +.calculate_effect_size <- function(df, formula, pair.by = NULL, + effect_size = c("none", "cliff", "cohen")) { + effect_size <- match.arg(effect_size) + if (effect_size == "none") { + return(NULL) + } + + lhs <- .get_lhs(formula) + rhs <- .get_rhs(formula) + + if (nrow(df) == 0L) { + return(NULL) + } + + if (effect_size == "cliff") { + if (!is.null(pair.by)) { + return(NULL) + } + delta_res <- lapply(split(df, df$rownames), function(.x) { + .x <- .x[stats::complete.cases(.x[, c(lhs, rhs), drop = FALSE]), , + drop = FALSE + ] + if (nrow(.x) == 0L || n_distinct(.x[[rhs]], na.rm = TRUE) != 2L) { + return(NULL) + } + cd <- cliff.delta(formula = formula, data = .x) + data.frame( + effect_size = unname(cd$estimate), + effect_size_lower = cd$conf.int[1], + effect_size_upper = cd$conf.int[2], + effect_size_magnitude = unname(cd$magnitude) + ) + }) + # remove NULLs and bind results + delta_res <- Filter(Negate(is.null), delta_res) + if (length(delta_res) == 0L) { + return(NULL) + } + delta_res <- bind_rows(delta_res, .id = "rownames") + return(delta_res) + } + + if (effect_size == "cohen") { + if (!is.null(pair.by)) { + return(NULL) + } + res_list <- lapply(split(df, df$rownames), function(.x) { + .x <- .x[stats::complete.cases(.x[, c(lhs, rhs), drop = FALSE]), , + drop = FALSE + ] + if (nrow(.x) == 0L || n_distinct(.x[[rhs]], na.rm = TRUE) < 2L) { + return(NULL) + } + cd <- tryCatch(effsize::cohen.d(formula = formula, data = .x), + error = function(e) NULL + ) + if (is.null(cd)) { + return(NULL) + } + est <- if (!is.null(cd$estimate)) unname(cd$estimate) else NA_real_ + mag <- if (!is.null(cd$magnitude)) unname(cd$magnitude) else NA_character_ + data.frame( + effect_size = est, + effect_size_magnitude = mag, + stringsAsFactors = FALSE + ) + }) + res_list <- Filter(Negate(is.null), res_list) + if (length(res_list) == 0L) { + return(NULL) + } + return(bind_rows(res_list, .id = "rownames")) + } +} + ################################################################################ # Wide-format data helper ################################################################################ @@ -241,7 +324,7 @@ #' @importFrom rlang .data #' @importFrom stats p.adjust .train_model_per_feature <- function(formula, mat, metadata, FUN, - p_adjust_method = "BH") { + p_adjust_method = "BH") { feature_df <- mat |> t() |> as.data.frame() diff --git a/man/getWilcoxonTest.Rd b/man/getWilcoxonTest.Rd index 0dfb710..2807b8c 100644 --- a/man/getWilcoxonTest.Rd +++ b/man/getWilcoxonTest.Rd @@ -4,7 +4,13 @@ \alias{getWilcoxonTest} \title{Perform Wilcoxon rank-sum test} \usage{ -getWilcoxonTest(tse, formula, pair.by = NULL, ...) +getWilcoxonTest( + tse, + formula, + pair.by = NULL, + effect_size = c("none", "cliff", "cohen"), + ... +) } \arguments{ \item{tse}{A \code{SummarizedExperiment} object.} @@ -16,10 +22,16 @@ grouping variable, e.g., \code{counts ~ Group}.} \item{pair.by}{\code{Character scalar} or \code{NULL}. Column for pairing samples in paired tests. (Default: \code{NULL})} +\item{effect_size}{\code{Character scalar}. Effect size to compute. +Supported values are \code{"none"}, \code{"cliff"}, and \code{"cohen"}. +(Default: \code{"none"})} + \item{...}{Additional arguments passed to \code{wilcox_test}.} } \value{ -A \code{data.frame} with test results. +A \code{data.frame} with test results. If +\code{effect_size = "cliff"} or \code{"cohen"} and the comparison is +unpaired with exactly two groups, output also includes effect size columns. } \description{ Performs Wilcoxon rank-sum test (Mann-Whitney U) to compare values between diff --git a/tests/testthat/test-getWilcoxonTest.R b/tests/testthat/test-getWilcoxonTest.R index fb74c18..a2963da 100644 --- a/tests/testthat/test-getWilcoxonTest.R +++ b/tests/testthat/test-getWilcoxonTest.R @@ -8,9 +8,56 @@ test_that("getWilcoxonTest with assay works", { res <- getWilcoxonTest(tse, counts ~ Geographical_location) expect_s3_class(res, "data.frame") expect_true("p" %in% names(res)) + expect_false("effect_size" %in% names(res)) }) test_that("getWilcoxonTest with colData variable works", { res <- getWilcoxonTest(tse, shannon ~ Geographical_location) expect_s3_class(res, "data.frame") + expect_false("effect_size" %in% names(res)) +}) + +test_that("getWilcoxonTest adds effect size for unpaired two-group comparisons", { + tse_two <- tse[, tse$Geographical_location %in% c("Ahmednagar", "Nashik")] + res <- getWilcoxonTest( + tse_two, + counts ~ Geographical_location, + effect_size = "cliff" + ) + expect_s3_class(res, "data.frame") + expect_true(all(c( + "effect_size", + "effect_size_lower", + "effect_size_upper", + "effect_size_magnitude" + ) %in% names(res))) +}) + +test_that("getWilcoxonTest adds Cohen's d for unpaired two-group comparisons", { + tse_two <- tse[, tse$Geographical_location %in% c("Ahmednagar", "Nashik")] + res <- getWilcoxonTest( + tse_two, + counts ~ Geographical_location, + effect_size = "cohen" + ) + expect_s3_class(res, "data.frame") + expect_true(all(c( + "effect_size", + "effect_size_magnitude" + ) %in% names(res))) +}) + +test_that("getWilcoxonTest does not add Cliff's delta for paired tests", { + tse_two <- tse[, tse$Geographical_location %in% c("Ahmednagar", "Nashik")] + tse_two$pair_id <- seq_len(ncol(tse_two)) + + res <- getWilcoxonTest( + tse_two, + counts ~ Geographical_location, + pair.by = "pair_id", + effect_size = "cliff" + ) + + expect_s3_class(res, "data.frame") + expect_false("effect_size" %in% names(res)) })