From 7472427f84bfdee5672354cb938b0aea4346ee6b Mon Sep 17 00:00:00 2001 From: Anjing Date: Wed, 25 Feb 2026 21:09:17 -0500 Subject: [PATCH 1/4] save phenotype_id in vqtl analysis --- R/quail_vqtl.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/R/quail_vqtl.R b/R/quail_vqtl.R index 7649c624..1267845e 100644 --- a/R/quail_vqtl.R +++ b/R/quail_vqtl.R @@ -99,7 +99,7 @@ univariate_regression <- function(X, y, Z = NULL, center = TRUE, #' phenotype <- rnorm(1000) #' results <- run_linear_regression1(genotype, phenotype) #' @noRd -run_linear_regression <- function(genotype, phenotype, covariates = NULL) { +run_linear_regression <- function(genotype, phenotype, covariates = NULL, phenotype_id = NULL) { if (!is.null(covariates)) { covariates <- as.data.frame(lapply(covariates, as.numeric)) } @@ -115,11 +115,12 @@ run_linear_regression <- function(genotype, phenotype, covariates = NULL) { snp_info <- lapply(colnames(genotype), parse_snp_info) data.frame( + phenotype_id = if (!is.null(phenotype_id)) phenotype_id else NA, chr = sapply(snp_info, function(x) x$chr), pos = sapply(snp_info, function(x) x$pos), - variant_id = colnames(genotype), alt = sapply(snp_info, function(x) x$alt), - ref = sapply(snp_info, function(x) x$ref), + ref = sapply(snp_info, function(x) x$ref), + variant_id = colnames(genotype), beta = reg_results$betahat, se = reg_results$sebetahat, z = reg_results$z_scores, @@ -143,7 +144,7 @@ run_linear_regression <- function(genotype, phenotype, covariates = NULL) { #' results <- QUAIL_pipeline(genotype, rank_score, covariates = covariates) #' } QUAIL_pipeline <- function(genotype, rank_score, phenotype = NULL, - covariates = NULL) { + covariates = NULL, phenotype_id = NULL) { start_time <- Sys.time() # Validate rank_score @@ -160,7 +161,7 @@ QUAIL_pipeline <- function(genotype, rank_score, phenotype = NULL, } # Perform vQTL analysis - vqtl_results <- run_linear_regression(genotype, rank_score, covariates) + vqtl_results <- run_linear_regression(genotype, rank_score, covariates, phenotype_id = phenotype_id) end_time <- Sys.time() cat("\nTotal vQTL runtime:", round(difftime(end_time, start_time, units = "secs"), 2), " seconds\n") From 30e6f72971b6e02a7e6d63fe66352a207baa17ae Mon Sep 17 00:00:00 2001 From: Anjing Date: Wed, 25 Feb 2026 21:13:27 -0500 Subject: [PATCH 2/4] qqtl and qtwas pipeline modification --- R/quantile_twas_weight.R | 167 ++++++++++++++++++++++++++++++++------- 1 file changed, 139 insertions(+), 28 deletions(-) diff --git a/R/quantile_twas_weight.R b/R/quantile_twas_weight.R index dbcd1b88..267c6fdb 100644 --- a/R/quantile_twas_weight.R +++ b/R/quantile_twas_weight.R @@ -98,6 +98,12 @@ qr_screen <- function( stop("Invalid screen_method. Choose 'fdr' or 'qvalue'.") } + # Ensure quantile_adjusted_pvalues is always a matrix (handles single-variant case) + if (!is.matrix(quantile_adjusted_pvalues)) { + quantile_adjusted_pvalues <- matrix(quantile_adjusted_pvalues, nrow = 1, + dimnames = list(colnames(X), names(quantile_adjusted_pvalues))) + } + sig_SNP_threshold <- which(adjusted_pvalues < screen_threshold) sig_SNP_top_count <- order(adjusted_pvalues)[1:top_count] sig_SNP_top_percent <- order(adjusted_pvalues)[1:max(1, round(length(adjusted_pvalues) * top_percent / 100))] @@ -703,6 +709,69 @@ calculate_coef_heterogeneity <- function(rq_coef_result) { return(heterogeneity_result) } +#' Calculate Xi Correlation for QR Coefficients +#' +#' This function calculates the xi correlation coefficient and p-value for each variant, +#' measuring the functional dependence between tau values and QR coefficients. +#' Uses coef_qr_0.1 to coef_qr_0.9 (17 values, excluding 0.05 and 0.95). +#' +#' @param rq_coef_result Data frame containing variant_id and coef_qr_* columns +#' @param tau_range Numeric vector of tau values to use (default: seq(0.1, 0.9, by = 0.05)) +#' @param min_valid Minimum number of valid (non-NA) coefficients required (default: 10) +#' @return A data frame with variant_id, xi, and xi_pval columns +#' @importFrom XICOR xicor +#' @export +calculate_xi_correlation <- function(rq_coef_result, tau_range = seq(0.1, 0.9, by = 0.05), min_valid = 10) { + # Build column names for the specified tau range + coef_col_names <- paste0("coef_qr_", tau_range) + + # Check which columns exist + existing_cols <- coef_col_names[coef_col_names %in% colnames(rq_coef_result)] + + if (length(existing_cols) == 0) { + warning("No coef_qr columns found in the specified tau range") + return(data.frame( + variant_id = rq_coef_result$variant_id, + xi = NA_real_, + xi_pval = NA_real_, + stringsAsFactors = FALSE + )) + } + + # Extract tau values from existing columns + existing_tau <- as.numeric(gsub("coef_qr_", "", existing_cols)) + + # Calculate xi for each variant + xi_results <- apply(rq_coef_result[, existing_cols, drop = FALSE], 1, function(coef_values) { + # Get valid (non-NA) coefficients + valid_indices <- !is.na(coef_values) + valid_coefs <- as.numeric(coef_values[valid_indices]) + valid_tau <- existing_tau[valid_indices] + + # Check if enough valid values + if (length(valid_coefs) < min_valid) { + return(c(xi = NA_real_, xi_pval = NA_real_)) + } + + # Calculate xi correlation + tryCatch({ + xicor_result <- XICOR::xicor(valid_tau, y = valid_coefs, pvalue = TRUE, method = "asymptotic") + return(c(xi = xicor_result$xi, xi_pval = xicor_result$pval)) + }, error = function(e) { + return(c(xi = NA_real_, xi_pval = NA_real_)) + }) + }) + + # Convert to data frame + xi_df <- data.frame( + variant_id = rq_coef_result$variant_id, + xi = xi_results["xi", ], + xi_pval = xi_results["xi_pval", ], + stringsAsFactors = FALSE + ) + + return(xi_df) +} #' Quantile TWAS Weight Pipeline #' @@ -743,10 +812,15 @@ calculate_coef_heterogeneity <- function(rq_coef_result) { #' #' @export quantile_twas_weight_pipeline <- function(X, Y, Z = NULL, maf = NULL, region_id = "", - ld_reference_meta_file = NULL, twas_maf_cutoff = 0.01, ld_pruning = FALSE, + ld_reference_meta_file = NULL, twas_maf_cutoff = 0.01, + ld_clumping = FALSE, ld_pruning = FALSE, + screen_significant = FALSE, quantile_qtl_tau_list = seq(0.05, 0.95, by = 0.05), quantile_twas_tau_list = seq(0.01, 0.99, by = 0.01), - screen_threshold = 0.05) { + screen_threshold = 0.05, + xi_tau_range = seq(0.1, 0.9, by = 0.05), + keep_variants = NULL) { + # Step 1: vQTL # Step 1-1: Calculate vQTL rank scores message("Step 0: Calculating vQTL rank scores for region ", region_id) num_tau_levels <- length(quantile_qtl_tau_list) # Convert tau.list to numeric count @@ -759,16 +833,17 @@ quantile_twas_weight_pipeline <- function(X, Y, Z = NULL, maf = NULL, region_id ) message("vQTL rank scores calculated.") - # Step 1-1: Run vQTL pipeline + # Step 1-2: Run vQTL pipeline message("Step 0.5: Running vQTL analysis for rank scores in region ", region_id) vqtl_results <- QUAIL_pipeline( genotype = X, rank_score = rank_score, - covariates = Z + covariates = Z, + phenotype_id = colnames(Y)[1] ) message("vQTL analysis completed. Proceeding to QR screen.") - # Step 1-2: QR screen + # Step 2: QR screen message("Starting QR screen for region ", region_id) p.screen <- qr_screen(X = X, Y = Y, Z = Z, tau.list = quantile_qtl_tau_list, screen_threshold = screen_threshold, screen_method = "qvalue", top_count = 10, top_percent = 15) message(paste0("Number of SNPs after QR screening: ", length(p.screen$sig_SNP_threshold))) @@ -778,54 +853,79 @@ quantile_twas_weight_pipeline <- function(X, Y, Z = NULL, maf = NULL, region_id qr_screen_pvalue_df = p.screen$df_result, vqtl_results = vqtl_results # Include vQTL results ) - if (length(p.screen$sig_SNP_threshold) == 0) { + if (screen_significant && length(p.screen$sig_SNP_threshold) == 0) { results$message <- paste0("No significant SNPs detected in region ", region_id) return(results) } - X_filtered <- X[, p.screen$sig_SNP_threshold, drop = FALSE] + if (screen_significant) { + X_filtered <- X[, p.screen$sig_SNP_threshold, drop = FALSE] + } else { + X_filtered <- X + } - # Step 2: LD clumping and pruning from results of QR_screen (using original QR screen results) - message("Performing LD clumping and pruning from QR screen results...") - LD_SNPs <- multicontext_ld_clumping(X = X[, p.screen$sig_SNP_threshold, drop = FALSE], qr_results = p.screen, maf_list = NULL) - selected_snps <- if (ld_pruning) LD_SNPs$final_SNPs else LD_SNPs$clumped_SNPs - x_clumped <- X[, p.screen$sig_SNP_threshold, drop = FALSE][, selected_snps, drop = FALSE] - # x_clumped <- X[, p.screen$sig_SNP_threshold, drop = FALSE][, LD_SNPs$final_SNPs, drop = FALSE] + # # Step 3: Optional LD clumping and pruning from results of QR_screen (using original QR screen results) + if (ld_clumping) { + message("Performing LD clumping and pruning from QR screen results...") + LD_SNPs <- multicontext_ld_clumping(X = X[, p.screen$sig_SNP_threshold, drop = FALSE], qr_results = p.screen, maf_list = NULL) + selected_snps <- if (ld_pruning) LD_SNPs$final_SNPs else LD_SNPs$clumped_SNPs + x_clumped <- X[, p.screen$sig_SNP_threshold, drop = FALSE][, selected_snps, drop = FALSE] + } else { + message("Skipping LD clumping.") + } - # Step 3: Only fit marginal QR to get beta with SNPs after LD pruning for quantile_qtl_tau_list values - message("LD clumping and pruning completed. Fitting marginal QR for selected SNPs...") - rq_coef_result <- perform_qr_analysis(X = x_clumped, Y = Y, Z = Z, tau_values = quantile_qtl_tau_list) + # Step 4: Fit marginal QR to get beta with SNPs for quantile_qtl_tau_list values + message("Fitting marginal QR for selected SNPs...") + X_for_qr <- if (ld_clumping) x_clumped else X_filtered + if (!is.null(keep_variants)) { + variants_to_keep <- intersect(keep_variants, colnames(X_for_qr)) + if (length(variants_to_keep) > 0) { + X_for_qr <- X_for_qr[, variants_to_keep, drop = FALSE] + message("Filtered to ", ncol(X_for_qr), " variants from keep_variants list for QR analysis") + } else { + message("Warning: No variants from keep_variants found in selected SNPs, using all selected SNPs") + } + } + rq_coef_result <- perform_qr_analysis(X = X_for_qr, Y = Y, Z = Z, tau_values = quantile_qtl_tau_list) - # Step 4: beta_heterogeneity in marginal model + # Step 5: Heterogeneity calculation + # Step 5-1: beta_heterogeneity index in marginal model message("Marginal QR for selected SNPs completed. Calculating beta heterogeneity...") beta_heterogeneity <- calculate_coef_heterogeneity(rq_coef_result) message("Beta heterogeneity calculation completed.") + + # Step 5-2: Calculate xi correlation (Chatterjee correlation test) + message("Calculating xi correlation for QR coefficients...") + xi_correlation <- calculate_xi_correlation(rq_coef_result, tau_range = xi_tau_range, min_valid = 10) + message("Xi correlation calculation completed.") + + # Merge xi and xi_pval into rq_coef_result (using left_join to preserve row order) + rq_coef_result <- rq_coef_result %>% + dplyr::left_join(xi_correlation, by = "variant_id") + results$rq_coef_df <- rq_coef_result results$beta_heterogeneity <- beta_heterogeneity - # Step 5: Optional LD panel filtering and MAF filtering from results of QR_screen + # Step 6: Optional LD panel filtering and MAF filtering from results of QR_screen if (!is.null(ld_reference_meta_file)) { message("Starting LD panel filtering...") ld_result <- tryCatch( { variants_kept <- filter_variants_by_ld_reference(colnames(X_filtered), ld_reference_meta_file) - if (length(variants_kept$data) == 0) { - results$message <- paste0("No SNPs left after LD filtering in region ", region_id) - return(NULL) - } - return(variants_kept) + if (length(variants_kept$data) == 0) NULL else variants_kept }, error = function(e) { - results$message <- paste0("Error in LD filtering for region ", region_id, ": ", e$message) - return(NULL) + message("Error in LD filtering for region ", region_id, ": ", e$message) + NULL } ) if (is.null(ld_result)) { + results$message <- paste0("No SNPs left or error in LD filtering in region ", region_id) return(results) } - X_filtered <- X_filtered[, variants_kept$data, drop = FALSE] + X_filtered <- X_filtered[, ld_result$data, drop = FALSE] message(paste0("Number of SNPs after LD filtering: ", ncol(X_filtered))) # MAF filtering @@ -843,7 +943,18 @@ quantile_twas_weight_pipeline <- function(X, Y, Z = NULL, maf = NULL, region_id } } - # Step 6: Filter highly correlated SNPs + # Step 7: Pre-filter variants by raw p_qr < 0.05 to reduce dimensionality + raw_pvals <- p.screen$pvec[colnames(X_filtered)] + sig_raw <- which(raw_pvals < 0.05) + if (length(sig_raw) > 0 && length(sig_raw) < ncol(X_filtered)) { + message("Pre-filtering variants by raw p_qr < 0.05: keeping ", length(sig_raw), " out of ", ncol(X_filtered), " variants") + X_filtered <- X_filtered[, sig_raw, drop = FALSE] + } else if (length(sig_raw) == 0) { + results$message <- paste0("No variants with raw p_qr < 0.05 in region ", region_id) + return(results) + } + + # Step 8: Filter highly correlated SNPs message("Filtering highly correlated SNPs...") if (ncol(X_filtered) > 1) { filtered <- corr_filter(X_filtered, 0.8) @@ -853,7 +964,7 @@ quantile_twas_weight_pipeline <- function(X, Y, Z = NULL, maf = NULL, region_id results$message <- paste0("Skipping correlation filter because there is only one significant SNP in region ", region_id) } - # Step 7: Fit QR and get twas weight and R2 for all taus + # Step 9: Fit QR and get twas weight and R2 for all taus message("Filter highly correlated SNPs completed. Fitting full QR to calculate TWAS weights and pseudo R-squared values...") AssocData <- list(X = X, Y = Y, C = Z, X.filter = X.filter) qr_beta_R2_results <- calculate_qr_and_pseudo_R2(AssocData, quantile_twas_tau_list) From 0915d174eec77ebbd2d95b5a59840b0d8e600a99 Mon Sep 17 00:00:00 2001 From: Anjing Date: Wed, 25 Feb 2026 21:29:33 -0500 Subject: [PATCH 3/4] added XICOR package --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 01c55b3b..babf0589 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,6 +52,7 @@ Suggests: qgg, quadprog, quantreg, + XICOR, qvalue, rmarkdown, snpStats, From 301f76e48a55792292ddd2299eca0a7391e88a38 Mon Sep 17 00:00:00 2001 From: al4225 Date: Thu, 26 Feb 2026 02:41:17 +0000 Subject: [PATCH 4/4] Update documentation --- NAMESPACE | 2 ++ man/QUAIL_pipeline.Rd | 8 +++++++- man/calculate_xi_correlation.Rd | 27 +++++++++++++++++++++++++++ man/quantile_twas_weight_pipeline.Rd | 6 +++++- 4 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 man/calculate_xi_correlation.Rd diff --git a/NAMESPACE b/NAMESPACE index a8ed34c0..6c4f8cc3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,6 +19,7 @@ export(bayes_n_rss_weights) export(bayes_n_weights) export(bayes_r_rss_weights) export(bayes_r_weights) +export(calculate_xi_correlation) export(clean_context_names) export(coloc_post_processor) export(coloc_wrapper) @@ -131,6 +132,7 @@ importFrom(IRanges,reduce) importFrom(IRanges,start) importFrom(S4Vectors,queryHits) importFrom(S4Vectors,subjectHits) +importFrom(XICOR,xicor) importFrom(bigsnpr,snp_clumping) importFrom(bigstatsr,FBM.code256) importFrom(coloc,coloc.bf_bf) diff --git a/man/QUAIL_pipeline.Rd b/man/QUAIL_pipeline.Rd index f4f8ffb3..51011d2a 100644 --- a/man/QUAIL_pipeline.Rd +++ b/man/QUAIL_pipeline.Rd @@ -5,7 +5,13 @@ \title{Main QUAIL pipeline QUAIL vQTL Analysis Pipeline} \usage{ -QUAIL_pipeline(genotype, rank_score, phenotype = NULL, covariates = NULL) +QUAIL_pipeline( + genotype, + rank_score, + phenotype = NULL, + covariates = NULL, + phenotype_id = NULL +) } \arguments{ \item{genotype}{numeric matrix (n x p) of genotypes.} diff --git a/man/calculate_xi_correlation.Rd b/man/calculate_xi_correlation.Rd new file mode 100644 index 00000000..782e09df --- /dev/null +++ b/man/calculate_xi_correlation.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/quantile_twas_weight.R +\name{calculate_xi_correlation} +\alias{calculate_xi_correlation} +\title{Calculate Xi Correlation for QR Coefficients} +\usage{ +calculate_xi_correlation( + rq_coef_result, + tau_range = seq(0.1, 0.9, by = 0.05), + min_valid = 10 +) +} +\arguments{ +\item{rq_coef_result}{Data frame containing variant_id and coef_qr_* columns} + +\item{tau_range}{Numeric vector of tau values to use (default: seq(0.1, 0.9, by = 0.05))} + +\item{min_valid}{Minimum number of valid (non-NA) coefficients required (default: 10)} +} +\value{ +A data frame with variant_id, xi, and xi_pval columns +} +\description{ +This function calculates the xi correlation coefficient and p-value for each variant, +measuring the functional dependence between tau values and QR coefficients. +Uses coef_qr_0.1 to coef_qr_0.9 (17 values, excluding 0.05 and 0.95). +} diff --git a/man/quantile_twas_weight_pipeline.Rd b/man/quantile_twas_weight_pipeline.Rd index 1b879581..a02f7e93 100644 --- a/man/quantile_twas_weight_pipeline.Rd +++ b/man/quantile_twas_weight_pipeline.Rd @@ -12,10 +12,14 @@ quantile_twas_weight_pipeline( region_id = "", ld_reference_meta_file = NULL, twas_maf_cutoff = 0.01, + ld_clumping = FALSE, ld_pruning = FALSE, + screen_significant = FALSE, quantile_qtl_tau_list = seq(0.05, 0.95, by = 0.05), quantile_twas_tau_list = seq(0.01, 0.99, by = 0.01), - screen_threshold = 0.05 + screen_threshold = 0.05, + xi_tau_range = seq(0.1, 0.9, by = 0.05), + keep_variants = NULL ) } \arguments{