From b85ca023d48599b80bd3e99525182f563fa545c0 Mon Sep 17 00:00:00 2001 From: Anthony Sena Date: Wed, 25 Feb 2026 15:07:02 -0500 Subject: [PATCH 1/3] Updates to remove unnecessary tests, add back collision detection and to properly pass off parameters to covariate builder functions --- R/GetCovariates.R | 6 -- R/GetDefaultCovariates.R | 21 ++--- R/UnitTestHelperFunctions.R | 3 +- tests/testthat/setup.R | 15 +++- tests/testthat/test-GetDefaultCovariates.R | 90 ---------------------- 5 files changed, 28 insertions(+), 107 deletions(-) diff --git a/R/GetCovariates.R b/R/GetCovariates.R index c3a03512..f2bf40f5 100644 --- a/R/GetCovariates.R +++ b/R/GetCovariates.R @@ -136,7 +136,6 @@ getDbCovariateData <- function(connectionDetails = NULL, cohortIds = c(-1), rowIdField = "subject_id", covariateSettings, - exportToTable = FALSE, createTable = exportToTable, dropTableIfExists = exportToTable, @@ -146,7 +145,6 @@ getDbCovariateData <- function(connectionDetails = NULL, targetCovariateRefTable = NULL, targetAnalysisRefTable = NULL, targetTimeRefTable = NULL, - aggregated = FALSE, minCharacterizationMean = 0, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), @@ -350,15 +348,11 @@ getDbCovariateData <- function(connectionDetails = NULL, cdmVersion = cdmVersion, rowIdField = rowIdField, covariateSettings = covariateSettings[[i]], - targetCovariateTable = targetCovariateTable, targetCovariateContinuousTable = targetCovariateContinuousTable, targetCovariateRefTable = targetCovariateRefTable, targetAnalysisRefTable = targetAnalysisRefTable, targetTimeRefTable = targetTimeRefTable, - dropTableIfExists = FALSE, # can remove this input - createTable = FALSE, # can remove this input - aggregated = aggregated, minCharacterizationMean = minCharacterizationMean ) diff --git a/R/GetDefaultCovariates.R b/R/GetDefaultCovariates.R index 6a7551ea..f2a46a2c 100644 --- a/R/GetDefaultCovariates.R +++ b/R/GetDefaultCovariates.R @@ -37,11 +37,6 @@ #' @param minCharacterizationMean The minimum mean value for binary characterization output. Values below this will be cut off from output. This #' will help reduce the file size of the characterization output, but will remove information #' on covariates that have very low values. The default is 0. -#' -#' -#' @param dropTableIfExists If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged -#' into existing table data. Overides createTable. -#' @param createTable Run sql to create table? Code does not check if table exists. #' @template GetCovarParams #' #' @examples @@ -74,15 +69,12 @@ getDbDefaultCovariateData <- function(connection, cdmVersion = "5", rowIdField = "subject_id", covariateSettings, - + targetDatabaseSchema = NULL, targetCovariateTable = NULL, targetCovariateContinuousTable = NULL, targetCovariateRefTable = NULL, targetAnalysisRefTable = NULL, targetTimeRefTable = NULL, - - dropTableIfExists = FALSE, - createTable = TRUE, aggregated = FALSE, minCharacterizationMean = 0, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")) { @@ -298,6 +290,17 @@ getDbDefaultCovariateData <- function(connection, andromedaTableName = "covariateRef", snakeCaseToCamelCase = TRUE ) + + collisions <- covariateData$covariateRef %>% + dplyr::filter(collisions > 0) %>% + dplyr::collect() + + if (nrow(collisions) > 0) { + warning(sprintf( + "Collisions in covariate IDs detected for post-coordinated concepts with covariate IDs %s", + paste(collisions$covariateId, paste = ", ") + )) + } } else{ sql <- " INSERT INTO @target_covariate_ref_table( diff --git a/R/UnitTestHelperFunctions.R b/R/UnitTestHelperFunctions.R index 205b598b..504a3f6b 100644 --- a/R/UnitTestHelperFunctions.R +++ b/R/UnitTestHelperFunctions.R @@ -71,7 +71,8 @@ rowIdField = "subject_id", covariateSettings, aggregated = FALSE, - minCharacterizationMean = 0) { + minCharacterizationMean = 0, + ...) { writeLines("Constructing length of observation covariates") if (covariateSettings$useLengthOfObs == FALSE) { return(NULL) diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index dd59bcfa..2007a292 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -2,6 +2,19 @@ library(testthat) library(FeatureExtraction) library(dplyr) +# AGS: This rJava code block was used to add the Java dependencies to the classpath for testing. It is needed to run the tests on all platforms, but it is not needed when running individual test files in RStudio, which is why it is not included in the helper functions file. If we want to run individual test files, we can use the loadRenderTranslateUnitTestSql function defined below, which also adds the Java dependencies to the classpath if they are not already there. +# library(rJava) +# .jinit() +# jar_dirs <- c( +# system.file("java", package = "DatabaseConnector"), +# system.file("java", package = "SqlRender"), +# system.file("java", package = "FeatureExtraction") +# ) +# jar_files <- unlist( +# lapply(jar_dirs, list.files, pattern = "\\.jar$", full.names = TRUE) +# ) +# .jaddClassPath(jar_files) + dbms <- getOption("dbms", default = "sqlite") message("************* Testing on ", dbms, " *************\n") @@ -98,7 +111,7 @@ checkRemoteFileAvailable <- function(remoteFile) { } # Then stop if status > 400 if (httr::http_error(resp)) { - message_for_status(resp) + httr::message_for_status(resp) return(NULL) } return("success") diff --git a/tests/testthat/test-GetDefaultCovariates.R b/tests/testthat/test-GetDefaultCovariates.R index 8c674acd..44118277 100644 --- a/tests/testthat/test-GetDefaultCovariates.R +++ b/tests/testthat/test-GetDefaultCovariates.R @@ -42,93 +42,3 @@ test_that("Test exit conditions", { )) }) -test_that("Test target table", { - skip_on_cran() - skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) - - results <- getDbDefaultCovariateData(connection = eunomiaConnection, - cdmDatabaseSchema = "main", - cohortTable = "cohort", - covariateSettings = createDefaultCovariateSettings(), - targetDatabaseSchema = "main", - targetTables = list(covariates = "ut_cov", - covariateRef = "ut_cov_ref", - analysisRef = "ut_cov_analysis_ref")) - - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov_ref")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov_analysis_ref")[1], 1) - - results <- getDbDefaultCovariateData(connection = eunomiaConnection, - cdmDatabaseSchema = "main", - cohortTable = "cohort", - covariateSettings = createDefaultCovariateSettings(), - targetDatabaseSchema = "main", - aggregated = TRUE, - targetTables = list(covariates = "ut_cov_agg", - covariateRef = "ut_cov_ref_agg", - analysisRef = "ut_cov_analysis_ref_agg")) - - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov_agg")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov_ref_agg")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov_analysis_ref_agg")[1], 1) - - # Temp tables with old prototype - results <- getDbDefaultCovariateData(connection = eunomiaConnection, - cdmDatabaseSchema = "main", - cohortTable = "cohort", - covariateSettings = createDefaultCovariateSettings(), - aggregated = TRUE, - targetCovariateTable = "#ut_cov_agg", - targetAnalysisRefTable = "#ut_cov_ref_agg", - targetCovariateRefTable = "#ut_cov_anal_ref_agg") - - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_agg")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_ref_agg")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_anal_ref_agg")[1], 1) - - results <- getDbDefaultCovariateData(connection = eunomiaConnection, - cdmDatabaseSchema = "main", - cohortTable = "cohort", - covariateSettings = createDefaultCovariateSettings(), - targetCovariateTable = "#ut_cov", - targetAnalysisRefTable = "#ut_cov_ref", - targetCovariateRefTable = "#ut_cov_analysis_ref") - - covCt <- DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov")[1] - expect_gt(covCt, 1) - covRefCt <- DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_ref")[1] - expect_gt(covRefCt, 1) - anlRefCt <- DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_analysis_ref")[1] - expect_gt(anlRefCt, 1) - - # append results rather than deleting the tables - results <- getDbDefaultCovariateData(connection = eunomiaConnection, - cdmDatabaseSchema = "main", - cohortTable = "cohort", - covariateSettings = createDefaultCovariateSettings(), - createTable = FALSE, - dropTableIfExists = FALSE, - targetCovariateTable = "#ut_cov", - targetAnalysisRefTable = "#ut_cov_ref", - targetCovariateRefTable = "#ut_cov_analysis_ref") - - expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov")[1], covCt * 2) - expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_ref")[1], covRefCt * 2) - expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_analysis_ref")[1], anlRefCt * 2) - - # Recreate tables (and check create override works) - results <- getDbDefaultCovariateData(connection = eunomiaConnection, - cdmDatabaseSchema = "main", - cohortTable = "cohort", - covariateSettings = createDefaultCovariateSettings(), - createTable = FALSE, - dropTableIfExists = TRUE, - targetCovariateTable = "#ut_cov", - targetAnalysisRefTable = "#ut_cov_ref", - targetCovariateRefTable = "#ut_cov_analysis_ref") - - expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov")[1], covCt) - expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_ref")[1], covRefCt) - expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_analysis_ref")[1], anlRefCt) -}) From ea0d107ead453e0a7dae4cf6d84ccd67244516c8 Mon Sep 17 00:00:00 2001 From: Anthony Sena Date: Wed, 25 Feb 2026 15:17:03 -0500 Subject: [PATCH 2/3] Update docs --- man/dot-getDbLooCovariateData.Rd | 3 ++- man/getDbDefaultCovariateData.Rd | 14 ++++---------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/man/dot-getDbLooCovariateData.Rd b/man/dot-getDbLooCovariateData.Rd index ab94c1af..0c0f7db8 100644 --- a/man/dot-getDbLooCovariateData.Rd +++ b/man/dot-getDbLooCovariateData.Rd @@ -14,7 +14,8 @@ rowIdField = "subject_id", covariateSettings, aggregated = FALSE, - minCharacterizationMean = 0 + minCharacterizationMean = 0, + ... ) } \arguments{ diff --git a/man/getDbDefaultCovariateData.Rd b/man/getDbDefaultCovariateData.Rd index aa1ae71d..7a0557bf 100644 --- a/man/getDbDefaultCovariateData.Rd +++ b/man/getDbDefaultCovariateData.Rd @@ -14,13 +14,12 @@ getDbDefaultCovariateData( cdmVersion = "5", rowIdField = "subject_id", covariateSettings, + targetDatabaseSchema = NULL, targetCovariateTable = NULL, targetCovariateContinuousTable = NULL, targetCovariateRefTable = NULL, targetAnalysisRefTable = NULL, targetTimeRefTable = NULL, - dropTableIfExists = FALSE, - createTable = TRUE, aggregated = FALSE, minCharacterizationMean = 0, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema") @@ -60,6 +59,9 @@ is more than one period per person.} \item{covariateSettings}{Either an object of type \code{covariateSettings} as created using one of the createCovariate functions, or a list of such objects.} +\item{targetDatabaseSchema}{(Optional) The name of the database schema where the resulting covariates +should be stored. If not provided, results will be fetched to R.} + \item{targetCovariateTable}{(Optional) The name of the table where the resulting covariates will be stored. If not provided, results will be fetched to R. The table can be a permanent table in the \code{targetDatabaseSchema} or a temp table. If @@ -71,11 +73,6 @@ it is a temp table, do not specify \code{targetDatabaseSchema}.} \item{targetTimeRefTable}{(Optional) The name of the table for the time reference} -\item{dropTableIfExists}{If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged -into existing table data. Overides createTable.} - -\item{createTable}{Run sql to create table? Code does not check if table exists.} - \item{aggregated}{Should aggregate statistics be computed instead of covariates per cohort entry?} @@ -86,9 +83,6 @@ on covariates that have very low values. The default is 0.} \item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp tables, provide a schema with write privileges where temp tables can be created.} - -\item{targetDatabaseSchema}{(Optional) The name of the database schema where the resulting covariates -should be stored. If not provided, results will be fetched to R.} } \value{ Returns an object of type \code{CovariateData}, which is an Andromeda object containing information on the baseline covariates. From 13374935b63b6023bf3b94a342e9c5eb05e0b955 Mon Sep 17 00:00:00 2001 From: Anthony Sena Date: Wed, 25 Feb 2026 16:38:31 -0500 Subject: [PATCH 3/3] Fix parameter documentation and example --- R/GetCovariatesFromOtherCohorts.R | 2 ++ R/GetDefaultCovariates.R | 6 ++---- R/UnitTestHelperFunctions.R | 1 + man/dot-getDbLooCovariateData.Rd | 2 ++ man/getDbCohortBasedCovariatesData.Rd | 9 ++++++--- man/getDbDefaultCovariateData.Rd | 6 +++--- 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/R/GetCovariatesFromOtherCohorts.R b/R/GetCovariatesFromOtherCohorts.R index 6b6b7952..f295d77f 100644 --- a/R/GetCovariatesFromOtherCohorts.R +++ b/R/GetCovariatesFromOtherCohorts.R @@ -29,6 +29,7 @@ #' a permanent table in the \code{targetDatabaseSchema} or a temp table. If #' it is a temp table, do not specify \code{targetDatabaseSchema}. #' +#' @param targetCovariateContinuousTable (Optional) The name of the table where the resulting continuous covariates should be stored. #' @param targetCovariateRefTable (Optional) The name of the table where the covariate reference will be stored. #' #' @param targetAnalysisRefTable (Optional) The name of the table where the analysis reference will be stored. @@ -48,6 +49,7 @@ getDbCohortBasedCovariatesData <- function(connection, cdmVersion = "5", rowIdField = "subject_id", covariateSettings, + targetDatabaseSchema = NULL, targetCovariateTable = NULL, targetCovariateContinuousTable = NULL, targetCovariateRefTable = NULL, diff --git a/R/GetDefaultCovariates.R b/R/GetDefaultCovariates.R index f2a46a2c..6243635e 100644 --- a/R/GetDefaultCovariates.R +++ b/R/GetDefaultCovariates.R @@ -29,7 +29,7 @@ #' be stored. If not provided, results will be fetched to R. The table can be #' a permanent table in the \code{targetDatabaseSchema} or a temp table. If #' it is a temp table, do not specify \code{targetDatabaseSchema}. -#' +#' @param targetCovariateContinuousTable (Optional) The name of the table where the resulting continuous covariates should be stored. #' @param targetCovariateRefTable (Optional) The name of the table where the covariate reference will be stored. #' #' @param targetAnalysisRefTable (Optional) The name of the table where the analysis reference will be stored. @@ -54,9 +54,7 @@ #' connection = connection, #' cdmDatabaseSchema = "main", #' cohortTable = "cohort", -#' covariateSettings = createDefaultCovariateSettings(), -#' targetDatabaseSchema = "main", -#' targetCovariateTable = "ut_cov" +#' covariateSettings = createDefaultCovariateSettings() #' ) #' } #' @export diff --git a/R/UnitTestHelperFunctions.R b/R/UnitTestHelperFunctions.R index 504a3f6b..d2bce2b1 100644 --- a/R/UnitTestHelperFunctions.R +++ b/R/UnitTestHelperFunctions.R @@ -59,6 +59,7 @@ #' @param minCharacterizationMean The minimum mean value for binary characterization output. Values below this will be cut off from output. This #' will help reduce the file size of the characterization output, but will remove information #' on covariates that have very low values. The default is 0. +#' @param ... Additional arguments, not used. #' @return #' Returns an object of type \code{covariateData}, containing information on the covariates. #' diff --git a/man/dot-getDbLooCovariateData.Rd b/man/dot-getDbLooCovariateData.Rd index 0c0f7db8..7524281f 100644 --- a/man/dot-getDbLooCovariateData.Rd +++ b/man/dot-getDbLooCovariateData.Rd @@ -55,6 +55,8 @@ cohort entry?} \item{minCharacterizationMean}{The minimum mean value for binary characterization output. Values below this will be cut off from output. This will help reduce the file size of the characterization output, but will remove information on covariates that have very low values. The default is 0.} + +\item{...}{Additional arguments, not used.} } \value{ Returns an object of type \code{covariateData}, containing information on the covariates. diff --git a/man/getDbCohortBasedCovariatesData.Rd b/man/getDbCohortBasedCovariatesData.Rd index e7e5e668..510934bb 100644 --- a/man/getDbCohortBasedCovariatesData.Rd +++ b/man/getDbCohortBasedCovariatesData.Rd @@ -14,6 +14,7 @@ getDbCohortBasedCovariatesData( cdmVersion = "5", rowIdField = "subject_id", covariateSettings, + targetDatabaseSchema = NULL, targetCovariateTable = NULL, targetCovariateContinuousTable = NULL, targetCovariateRefTable = NULL, @@ -59,11 +60,16 @@ is more than one period per person.} \code{\link{createCohortBasedCovariateSettings}} or \code{\link{createCohortBasedTemporalCovariateSettings}} functions.} +\item{targetDatabaseSchema}{(Optional) The name of the database schema where the resulting covariates +should be stored. If not provided, results will be fetched to R.} + \item{targetCovariateTable}{(Optional) The name of the table where the resulting covariates will be stored. If not provided, results will be fetched to R. The table can be a permanent table in the \code{targetDatabaseSchema} or a temp table. If it is a temp table, do not specify \code{targetDatabaseSchema}.} +\item{targetCovariateContinuousTable}{(Optional) The name of the table where the resulting continuous covariates should be stored.} + \item{targetCovariateRefTable}{(Optional) The name of the table where the covariate reference will be stored.} \item{targetAnalysisRefTable}{(Optional) The name of the table where the analysis reference will be stored.} @@ -80,9 +86,6 @@ on covariates that have very low values. The default is 0.} \item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp tables, provide a schema with write privileges where temp tables can be created.} - -\item{targetDatabaseSchema}{(Optional) The name of the database schema where the resulting covariates -should be stored. If not provided, results will be fetched to R.} } \value{ Returns an object of type \code{CovariateData}, which is an Andromeda object containing information on the baseline covariates. diff --git a/man/getDbDefaultCovariateData.Rd b/man/getDbDefaultCovariateData.Rd index 7a0557bf..99dda8c9 100644 --- a/man/getDbDefaultCovariateData.Rd +++ b/man/getDbDefaultCovariateData.Rd @@ -67,6 +67,8 @@ be stored. If not provided, results will be fetched to R. The table can be a permanent table in the \code{targetDatabaseSchema} or a temp table. If it is a temp table, do not specify \code{targetDatabaseSchema}.} +\item{targetCovariateContinuousTable}{(Optional) The name of the table where the resulting continuous covariates should be stored.} + \item{targetCovariateRefTable}{(Optional) The name of the table where the covariate reference will be stored.} \item{targetAnalysisRefTable}{(Optional) The name of the table where the analysis reference will be stored.} @@ -122,9 +124,7 @@ results <- getDbDefaultCovariateData( connection = connection, cdmDatabaseSchema = "main", cohortTable = "cohort", - covariateSettings = createDefaultCovariateSettings(), - targetDatabaseSchema = "main", - targetCovariateTable = "ut_cov" + covariateSettings = createDefaultCovariateSettings() ) } }