From e2ccfcb042098a0f553c35cc835319a709013f7b Mon Sep 17 00:00:00 2001 From: Jamie Gilbert Date: Wed, 12 Jan 2022 12:23:38 -0800 Subject: [PATCH 01/17] Made commented sqlite test for old code work --- R/GetDefaultCovariates.R | 181 +++++++++------------ tests/testthat/test-GetDefaultCovariates.R | 30 ++-- 2 files changed, 90 insertions(+), 121 deletions(-) diff --git a/R/GetDefaultCovariates.R b/R/GetDefaultCovariates.R index 12ca90af..6a871f58 100644 --- a/R/GetDefaultCovariates.R +++ b/R/GetDefaultCovariates.R @@ -54,10 +54,7 @@ getDbDefaultCovariateData <- function(connection, if (cdmVersion == "4") { stop("Common Data Model version 4 is not supported") } - if (!missing(targetCovariateTable) && !is.null(targetCovariateTable) && aggregated) { - stop("Writing aggregated results to database is currently not supported") - } - + settings <- .toJson(covariateSettings) rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$init(system.file("", package = "FeatureExtraction")) json <- rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$createSql(settings, aggregated, cohortTable, rowIdField, rJava::.jarray(as.character(cohortId)), cdmDatabaseSchema) @@ -74,123 +71,95 @@ getDbDefaultCovariateData <- function(connection, oracleTempSchema = oracleTempSchema) } } - + ParallelLogger::logInfo("Constructing features on server") - + sql <- SqlRender::translate(sql = todo$sqlConstruction, targetDialect = attr(connection, "dbms"), oracleTempSchema = oracleTempSchema) profile <- (!is.null(getOption("dbProfile")) && getOption("dbProfile") == TRUE) DatabaseConnector::executeSql(connection, sql, profile = profile) - - if (missing(targetCovariateTable) || is.null(targetCovariateTable)) { - ParallelLogger::logInfo("Fetching data from server") - start <- Sys.time() - # Binary or non-aggregated features + + + if (missing(targetCovariateTable) | is.null(targetCovariateTable)) { covariateData <- Andromeda::andromeda() - if (!is.null(todo$sqlQueryFeatures)) { - sql <- SqlRender::translate(sql = todo$sqlQueryFeatures, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - - DatabaseConnector::querySqlToAndromeda(connection = connection, - sql = sql, - andromeda = covariateData, - andromedaTableName = "covariates", - snakeCaseToCamelCase = TRUE) - } - - # Continuous aggregated features - if (!is.null(todo$sqlQueryContinuousFeatures)) { - sql <- SqlRender::translate(sql = todo$sqlQueryContinuousFeatures, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - DatabaseConnector::querySqlToAndromeda(connection = connection, - sql = sql, - andromeda = covariateData, - andromedaTableName = "covariatesContinuous", - snakeCaseToCamelCase = TRUE) - } - - # Covariate reference - sql <- SqlRender::translate(sql = todo$sqlQueryFeatureRef, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - - DatabaseConnector::querySqlToAndromeda(connection = connection, - sql = sql, - andromeda = covariateData, - andromedaTableName = "covariateRef", - snakeCaseToCamelCase = TRUE) - - # Analysis reference - sql <- SqlRender::translate(sql = todo$sqlQueryAnalysisRef, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - DatabaseConnector::querySqlToAndromeda(connection = connection, - sql = sql, - andromeda = covariateData, - andromedaTableName = "analysisRef", - snakeCaseToCamelCase = TRUE) - - # Time reference - if (!is.null(todo$sqlQueryTimeRef)) { - sql <- SqlRender::translate(sql = todo$sqlQueryTimeRef, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - DatabaseConnector::querySqlToAndromeda(connection = connection, - sql = sql, - andromeda = covariateData, - andromedaTableName = "timeRef", + + queryFunction <- function(sql, tableName) { + DatabaseConnector::querySqlToAndromeda(connection = connection, + sql = sql, + andromeda = covariateData, + andromedaTableName = tableName, snakeCaseToCamelCase = TRUE) } - - - delta <- Sys.time() - start - ParallelLogger::logInfo("Fetching data took ", signif(delta, 3), " ", attr(delta, "units")) + } else { - # Don't fetch to R , but create on server instead - ParallelLogger::logInfo("Writing data to table") - start <- Sys.time() + convertQuery <- function(sql, databaseSchema, table) { - if (missing(databaseSchema) || is.null(databaseSchema)) { - tableName <- table - } else { - tableName <- paste(databaseSchema, table, sep = ".") - } - return(sub("FROM", paste("INTO", tableName, "FROM"), sql)) - } - - # Covariates - if (!is.null(todo$sqlQueryFeatures)) { - sql <- convertQuery(todo$sqlQueryFeatures, targetDatabaseSchema, targetCovariateTable) - sql <- SqlRender::translate(sql = sql, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) - } - - # Covariate reference - if (!missing(targetCovariateRefTable) && !is.null(targetCovariateRefTable)) { - sql <- convertQuery(todo$sqlQueryFeatureRef, targetDatabaseSchema, targetCovariateRefTable) - sql <- SqlRender::translate(sql = sql, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) + outerSql <- " + IF OBJECT_ID('@database_schema.@table', 'U') IS NOT NULL + DROP TABLE @database_schema.@table; + + CREATE TABLE @database_schema.@table AS + @sub_query;" + SqlRender::render(outerSql, + sub_query = gsub(";", "", sql), + database_schema = databaseSchema, + table = SqlRender::snakeCaseToCamelCase(table)) + } - - # Analysis reference - if (!missing(targetAnalysisRefTable) && !is.null(targetAnalysisRefTable)) { - sql <- convertQuery(todo$sqlQueryAnalysisRef, targetDatabaseSchema, targetAnalysisRefTable) + + queryFunction <- function(sql, tableName) { + sql <- convertQuery(todo$sqlQueryFeatures, targetDatabaseSchema, tableName) sql <- SqlRender::translate(sql = sql, targetDialect = attr(connection, "dbms"), oracleTempSchema = oracleTempSchema) - DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) + DatabaseConnector::renderTranslateExecuteSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) } - delta <- Sys.time() - start - ParallelLogger::logInfo("Writing data took", signif(delta, 3), " ", attr(delta, "units")) - + + } + + ParallelLogger::logInfo("Fetching data from server") + start <- Sys.time() + # Binary or non-aggregated features + if (!is.null(todo$sqlQueryFeatures)) { + sql <- SqlRender::translate(sql = todo$sqlQueryFeatures, + targetDialect = attr(connection, "dbms"), + oracleTempSchema = oracleTempSchema) + queryFunction(sql, "covariates") + } + + # Continuous aggregated features + if (!is.null(todo$sqlQueryContinuousFeatures)) { + sql <- SqlRender::translate(sql = todo$sqlQueryContinuousFeatures, + targetDialect = attr(connection, "dbms"), + oracleTempSchema = oracleTempSchema) + queryFunction(sql, "covariatesContinuous") } + + # Covariate reference + sql <- SqlRender::translate(sql = todo$sqlQueryFeatureRef, + targetDialect = attr(connection, "dbms"), + oracleTempSchema = oracleTempSchema) + + queryFunction(sql, "covariateRef") + + # Analysis reference + sql <- SqlRender::translate(sql = todo$sqlQueryAnalysisRef, + targetDialect = attr(connection, "dbms"), + oracleTempSchema = oracleTempSchema) + queryFunction(sql, "analysisRef") + + # Time reference + if (!is.null(todo$sqlQueryTimeRef)) { + sql <- SqlRender::translate(sql = todo$sqlQueryTimeRef, + targetDialect = attr(connection, "dbms"), + oracleTempSchema = oracleTempSchema) + queryFunction(sql, "timeRef") + } + + delta <- Sys.time() - start + ParallelLogger::logInfo("Fetching data took ", signif(delta, 3), " ", attr(delta, "units")) + # Drop temp tables sql <- SqlRender::translate(sql = todo$sqlCleanup, targetDialect = attr(connection, "dbms"), @@ -206,8 +175,8 @@ getDbDefaultCovariateData <- function(connection, DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) } } - - if (missing(targetCovariateTable) || is.null(targetCovariateTable)) { + + if (missing(targetCovariateTable) | is.null(targetCovariateTable)) { attr(covariateData, "metaData") <- list() if (is.null(covariateData$covariates) && is.null(covariateData$covariatesContinuous)) { warning("No data found, probably because no covariates were specified.") diff --git a/tests/testthat/test-GetDefaultCovariates.R b/tests/testthat/test-GetDefaultCovariates.R index f8e299b0..814ccfc3 100644 --- a/tests/testthat/test-GetDefaultCovariates.R +++ b/tests/testthat/test-GetDefaultCovariates.R @@ -39,20 +39,20 @@ test_that("Test exit conditions", { }) # AGS - This test fails and is likely due to a bug when using SqlLite -# test_that("Test target table", { -# connection <- DatabaseConnector::connect(connectionDetails) -# Eunomia::createCohorts(connectionDetails) -# -# results <- getDbDefaultCovariateData(connection = connection, -# cdmDatabaseSchema = "main", -# cohortTable = "cohort", -# covariateSettings = createDefaultCovariateSettings(), -# targetDatabaseSchema = "main", -# targetCovariateTable = "ut_cov", -# targetCovariateRefTable = "ut_cov_ref", -# targetAnalysisRefTable = "ut_cov_analysis_ref") -# -# on.exit(DatabaseConnector::disconnect(connection)) -# }) +test_that("Test target table", { + connection <- DatabaseConnector::connect(connectionDetails) + Eunomia::createCohorts(connectionDetails) + + results <- getDbDefaultCovariateData(connection = connection, + cdmDatabaseSchema = "main", + cohortTable = "cohort", + covariateSettings = createDefaultCovariateSettings(), + targetDatabaseSchema = "main", + targetCovariateTable = "ut_cov", + targetCovariateRefTable = "ut_cov_ref", + targetAnalysisRefTable = "ut_cov_analysis_ref") + + DatabaseConnector::disconnect(connection) +}) unlink(connectionDetails$server()) \ No newline at end of file From 235a74a426272e439be97ac46ad08b949bb3c8ae Mon Sep 17 00:00:00 2001 From: Jamie Gilbert Date: Wed, 12 Jan 2022 14:37:34 -0800 Subject: [PATCH 02/17] Changes to interface of getDefaultCovariates --- R/GetDefaultCovariates.R | 71 ++++++++++++++-------- man/getDbDefaultCovariateData.Rd | 20 +++--- tests/testthat/test-GetDefaultCovariates.R | 52 +++++++++------- 3 files changed, 82 insertions(+), 61 deletions(-) diff --git a/R/GetDefaultCovariates.R b/R/GetDefaultCovariates.R index 6a871f58..57ca78b6 100644 --- a/R/GetDefaultCovariates.R +++ b/R/GetDefaultCovariates.R @@ -24,14 +24,13 @@ #' @param covariateSettings Either an object of type \code{covariateSettings} as created using one #' of the createCovariate functions, or a list of such objects. #' @param targetDatabaseSchema (Optional) The name of the database schema where the resulting covariates -#' should be stored. -#' @param targetCovariateTable (Optional) The name of the table where the resulting covariates will -#' be stored. If not provided, results will be fetched to R. The table can be -#' a permanent table in the \code{targetDatabaseSchema} or a temp table. If -#' it is a temp table, do not specify \code{targetDatabaseSchema}. -#' @param targetCovariateRefTable (Optional) The name of the table where the covariate reference will be stored. -#' @param targetAnalysisRefTable (Optional) The name of the table where the analysis reference will be stored. -#' +#' should be stored. If not provided, results will be fetched to R. +#' @param targetTables (Optional) list of mappings for table names. +#' The names of the table where the resulting covariates will be if +#' \code{targetDatabaseSchema} is specified. The tables will be created in permanent +#' table in the \code{targetDatabaseSchema}. +#' @param dropExistingTables If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged +#' into existing table data. #' @template GetCovarParams #' #' @export @@ -44,9 +43,8 @@ getDbDefaultCovariateData <- function(connection, rowIdField = "subject_id", covariateSettings, targetDatabaseSchema, - targetCovariateTable, - targetCovariateRefTable, - targetAnalysisRefTable, + targetTables = list(), + dropExistingTables = FALSE, aggregated = FALSE) { if (!is(covariateSettings, "covariateSettings")) { stop("Covariate settings object not of type covariateSettings") @@ -80,8 +78,8 @@ getDbDefaultCovariateData <- function(connection, profile <- (!is.null(getOption("dbProfile")) && getOption("dbProfile") == TRUE) DatabaseConnector::executeSql(connection, sql, profile = profile) - - if (missing(targetCovariateTable) | is.null(targetCovariateTable)) { + if (missing(targetDatabaseSchema) | is.null(targetDatabaseSchema)) { + # Save to Andromeda covariateData <- Andromeda::andromeda() queryFunction <- function(sql, tableName) { @@ -91,34 +89,53 @@ getDbDefaultCovariateData <- function(connection, andromedaTableName = tableName, snakeCaseToCamelCase = TRUE) } - + ParallelLogger::logInfo("Fetching data from server") } else { + # Save to DB + ParallelLogger::logInfo("Creating tables on server") + existingTables <- DatabaseConnector::getTableNames(connection, targetDatabaseSchema) + + convertQuery <- function(sql, targetDatabaseSchema, table) { + mappedTable <- targetTables[[table]] + if (is.null(mappedTable)) { + mappedTable <- SqlRender::camelCaseToSnakeCase(table) + } + tableExists <- mappedTable %in% existingTables + + if (!dropExistingTables & tableExists) { + ParallelLogger::logInfo("Appending", table, " results to table ", mappedTable) + } else { + ParallelLogger::logInfo("Creating table ", mappedTable, "for ", table) + } - convertQuery <- function(sql, databaseSchema, table) { outerSql <- " + {@create} ? { IF OBJECT_ID('@database_schema.@table', 'U') IS NOT NULL DROP TABLE @database_schema.@table; - CREATE TABLE @database_schema.@table AS - @sub_query;" + SELECT * INTO @database_schema.@table FROM ( @sub_query ) sq; + } : { + INSERT INTO @database_schema.@table @sub_query; + } + " SqlRender::render(outerSql, sub_query = gsub(";", "", sql), - database_schema = databaseSchema, - table = SqlRender::snakeCaseToCamelCase(table)) - + database_schema = targetDatabaseSchema, + create = dropExistingTables | !tableExists, + table = mappedTable) } queryFunction <- function(sql, tableName) { - sql <- convertQuery(todo$sqlQueryFeatures, targetDatabaseSchema, tableName) - sql <- SqlRender::translate(sql = sql, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - DatabaseConnector::renderTranslateExecuteSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) + sql <- convertQuery(sql, targetDatabaseSchema, tableName) + DatabaseConnector::renderTranslateExecuteSql(connection, + sql, + tempEmulationSchema = oracleTempSchema, + progressBar = FALSE, + reportOverallTime = FALSE) } } - ParallelLogger::logInfo("Fetching data from server") start <- Sys.time() # Binary or non-aggregated features if (!is.null(todo$sqlQueryFeatures)) { @@ -176,7 +193,7 @@ getDbDefaultCovariateData <- function(connection, } } - if (missing(targetCovariateTable) | is.null(targetCovariateTable)) { + if (missing(targetTables) | is.null(targetTables)) { attr(covariateData, "metaData") <- list() if (is.null(covariateData$covariates) && is.null(covariateData$covariatesContinuous)) { warning("No data found, probably because no covariates were specified.") diff --git a/man/getDbDefaultCovariateData.Rd b/man/getDbDefaultCovariateData.Rd index 1628d2ca..bf5c1b45 100644 --- a/man/getDbDefaultCovariateData.Rd +++ b/man/getDbDefaultCovariateData.Rd @@ -14,9 +14,8 @@ getDbDefaultCovariateData( rowIdField = "subject_id", covariateSettings, targetDatabaseSchema, - targetCovariateTable, - targetCovariateRefTable, - targetAnalysisRefTable, + targetTables = list(), + dropExistingTables = FALSE, aggregated = FALSE ) } @@ -51,16 +50,15 @@ is more than one period per person.} of the createCovariate functions, or a list of such objects.} \item{targetDatabaseSchema}{(Optional) The name of the database schema where the resulting covariates -should be stored.} +should be stored. If not provided, results will be fetched to R.} -\item{targetCovariateTable}{(Optional) The name of the table where the resulting covariates will -be stored. If not provided, results will be fetched to R. The table can be -a permanent table in the \code{targetDatabaseSchema} or a temp table. If -it is a temp table, do not specify \code{targetDatabaseSchema}.} +\item{targetTables}{(Optional) list of mappings for table names. +The names of the table where the resulting covariates will be if +\code{targetDatabaseSchema} is specified. The tables will be created in permanent +table in the \code{targetDatabaseSchema}.} -\item{targetCovariateRefTable}{(Optional) The name of the table where the covariate reference will be stored.} - -\item{targetAnalysisRefTable}{(Optional) The name of the table where the analysis reference will be stored.} +\item{dropExistingTables}{If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged +into existing table data.} \item{aggregated}{Should aggregate statistics be computed instead of covariates per cohort entry?} diff --git a/tests/testthat/test-GetDefaultCovariates.R b/tests/testthat/test-GetDefaultCovariates.R index 814ccfc3..bd3d9096 100644 --- a/tests/testthat/test-GetDefaultCovariates.R +++ b/tests/testthat/test-GetDefaultCovariates.R @@ -6,39 +6,27 @@ connectionDetails <- Eunomia::getEunomiaConnectionDetails() test_that("Test exit conditions", { connection <- DatabaseConnector::connect(connectionDetails) - + # covariateSettings object type expect_error(getDbDefaultCovariateData(connection = connection, cdmDatabaseSchema = "main", covariateSettings = list(), targetDatabaseSchema = "main", - targetCovariateTable = "cov", - targetCovariateRefTable = "cov_ref", - targetAnalysisRefTable = "cov_analysis_ref")) + targetTables = list(covariates = "cov", + covariateRef = "cov_ref", + analysisRef = "cov_analysis_ref"))) # CDM 4 not supported expect_error(getDbDefaultCovariateData(connection = connection, cdmDatabaseSchema = "main", cdmVersion = "4", covariateSettings = createDefaultCovariateSettings(), targetDatabaseSchema = "main", - targetCovariateTable = "cov", - targetCovariateRefTable = "cov_ref", - targetAnalysisRefTable = "cov_analysis_ref")) - - # targetCovariateTable and aggregated not supported - expect_error(getDbDefaultCovariateData(connection = connection, - cdmDatabaseSchema = "main", - covariateSettings = createDefaultCovariateSettings(), - targetDatabaseSchema = "main", - targetCovariateTable = "cov", - targetCovariateRefTable = "cov_ref", - targetAnalysisRefTable = "cov_analysis_ref", - aggregated = TRUE)) - - on.exit(DatabaseConnector::disconnect(connection)) + targetTables = list(covariates = "cov", + covariateRef = "cov_ref", + analysisRef = "cov_analysis_ref"))) + on.exit(DatabaseConnector::disconnect(connection)) }) -# AGS - This test fails and is likely due to a bug when using SqlLite test_that("Test target table", { connection <- DatabaseConnector::connect(connectionDetails) Eunomia::createCohorts(connectionDetails) @@ -48,9 +36,27 @@ test_that("Test target table", { cohortTable = "cohort", covariateSettings = createDefaultCovariateSettings(), targetDatabaseSchema = "main", - targetCovariateTable = "ut_cov", - targetCovariateRefTable = "ut_cov_ref", - targetAnalysisRefTable = "ut_cov_analysis_ref") + targetTables = list(covariates = "ut_cov", + covariateRef = "ut_cov_ref", + analysisRef = "ut_cov_analysis_ref")) + + expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM main.ut_cov")[1], 1) + expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM main.ut_cov_ref")[1], 1) + expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM main.ut_cov_analysis_ref")[1], 1) + + results <- getDbDefaultCovariateData(connection = connection, + cdmDatabaseSchema = "main", + cohortTable = "cohort", + covariateSettings = createDefaultCovariateSettings(), + targetDatabaseSchema = "main", + aggregated = TRUE, + targetTables = list(covariates = "ut_cov_agg", + covariateRef = "ut_cov_ref_agg", + analysisRef = "ut_cov_analysis_ref_agg")) + + expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM main.ut_cov_agg")[1], 1) + expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM main.ut_cov_ref_agg")[1], 1) + expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM main.ut_cov_analysis_ref_agg")[1], 1) DatabaseConnector::disconnect(connection) }) From 4ecc262debf33daaa269ea2a4321b8c1bd29879d Mon Sep 17 00:00:00 2001 From: Jamie Gilbert Date: Thu, 13 Jan 2022 09:33:36 -0800 Subject: [PATCH 03/17] Fixes, restoration of old parameters and adaptation to use temp tables --- .Rbuildignore | 2 + DESCRIPTION | 2 +- R/GetDefaultCovariates.R | 81 ++++++++++++++-------- man/getDbDefaultCovariateData.Rd | 24 ++++++- tests/testthat/test-GetDefaultCovariates.R | 30 +++++++- 5 files changed, 105 insertions(+), 34 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 12007bba..2daec274 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,5 @@ +^renv$ +^renv\.lock$ pom.xml extras docs diff --git a/DESCRIPTION b/DESCRIPTION index 576a8b36..348766b5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -42,5 +42,5 @@ Suggests: Remotes: ohdsi/Eunomia NeedsCompilation: no -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.2 Encoding: UTF-8 diff --git a/R/GetDefaultCovariates.R b/R/GetDefaultCovariates.R index 57ca78b6..a6a63519 100644 --- a/R/GetDefaultCovariates.R +++ b/R/GetDefaultCovariates.R @@ -28,7 +28,18 @@ #' @param targetTables (Optional) list of mappings for table names. #' The names of the table where the resulting covariates will be if #' \code{targetDatabaseSchema} is specified. The tables will be created in permanent -#' table in the \code{targetDatabaseSchema}. +#' table in the \code{targetDatabaseSchema} or as temporary tables. Tables that can be +#' included in this list: covariates, covariateRef, analysisRef, covariatesContinuous, +#' timeRef +#' @param targetCovariateTable (Optional) The name of the table where the resulting covariates will +#' be stored. If not provided, results will be fetched to R. The table can be +#' a permanent table in the \code{targetDatabaseSchema} or a temp table. If +#' it is a temp table, do not specify \code{targetDatabaseSchema}. +#' Superseded by \code{targetTables} +#' @param targetCovariateRefTable (Optional) The name of the table where the covariate reference will be stored. +#' Superseded by \code{targetTables} +#' @param targetAnalysisRefTable (Optional) The name of the table where the analysis reference will be stored. +#' Superseded by \code{targetTables} #' @param dropExistingTables If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged #' into existing table data. #' @template GetCovarParams @@ -42,8 +53,15 @@ getDbDefaultCovariateData <- function(connection, cdmVersion = "5", rowIdField = "subject_id", covariateSettings, - targetDatabaseSchema, - targetTables = list(), + targetDatabaseSchema = NULL, + targetCovariateTable = NULL, + targetCovariateRefTable = NULL, + targetAnalysisRefTable = NULL, + targetTables = list( + covariates = targetCovariateTable, + covariateRef = targetCovariateRefTable, + analysisRef = targetAnalysisRefTable + ), dropExistingTables = FALSE, aggregated = FALSE) { if (!is(covariateSettings, "covariateSettings")) { @@ -77,8 +95,9 @@ getDbDefaultCovariateData <- function(connection, oracleTempSchema = oracleTempSchema) profile <- (!is.null(getOption("dbProfile")) && getOption("dbProfile") == TRUE) DatabaseConnector::executeSql(connection, sql, profile = profile) - - if (missing(targetDatabaseSchema) | is.null(targetDatabaseSchema)) { + # Is the target schema missing or are all the specified tables temp + allTempTables <- all(substr(targetTables,1,1) == "#") + if ((missing(targetDatabaseSchema) | is.null(targetDatabaseSchema)) & !allTempTables) { # Save to Andromeda covariateData <- Andromeda::andromeda() @@ -95,38 +114,46 @@ getDbDefaultCovariateData <- function(connection, ParallelLogger::logInfo("Creating tables on server") existingTables <- DatabaseConnector::getTableNames(connection, targetDatabaseSchema) - convertQuery <- function(sql, targetDatabaseSchema, table) { - mappedTable <- targetTables[[table]] - if (is.null(mappedTable)) { - mappedTable <- SqlRender::camelCaseToSnakeCase(table) - } - tableExists <- mappedTable %in% existingTables - - if (!dropExistingTables & tableExists) { - ParallelLogger::logInfo("Appending", table, " results to table ", mappedTable) - } else { - ParallelLogger::logInfo("Creating table ", mappedTable, "for ", table) - } - + convertQuery <- function(sql, table, tableExists) { outerSql <- " {@create} ? { - IF OBJECT_ID('@database_schema.@table', 'U') IS NOT NULL - DROP TABLE @database_schema.@table; + IF OBJECT_ID('@table', 'U') IS NOT NULL + DROP TABLE @table; - SELECT * INTO @database_schema.@table FROM ( @sub_query ) sq; + SELECT * INTO @table FROM ( @sub_query ) sq; } : { - INSERT INTO @database_schema.@table @sub_query; + INSERT INTO @table @sub_query; } " SqlRender::render(outerSql, sub_query = gsub(";", "", sql), - database_schema = targetDatabaseSchema, create = dropExistingTables | !tableExists, - table = mappedTable) + table = table) } - queryFunction <- function(sql, tableName) { - sql <- convertQuery(sql, targetDatabaseSchema, tableName) + queryFunction <- function(sql, table) { + mappedTable <- targetTables[[table]] + if (is.null(mappedTable)) { + if (allTempTables) { + # Only bother storing specified temp tables + ParallelLogger::logInfo("Skipping", table, " other mapped tables are temp") + return(NULL) + } + mappedTable <- SqlRender::camelCaseToSnakeCase(table) + } + tableExists <- mappedTable %in% existingTables + + if (substr(mappedTable, 1, 1) != "#") { + mappedTable <- paste0(targetDatabaseSchema, ".", mappedTable) + } + + if (!dropExistingTables & tableExists) { + ParallelLogger::logInfo("Appending", table, " results to table ", mappedTable) + } else { + ParallelLogger::logInfo("Creating table ", mappedTable, " for ", table) + } + + sql <- convertQuery(sql, mappedTable, tableExists) DatabaseConnector::renderTranslateExecuteSql(connection, sql, tempEmulationSchema = oracleTempSchema, @@ -193,7 +220,7 @@ getDbDefaultCovariateData <- function(connection, } } - if (missing(targetTables) | is.null(targetTables)) { + if ((missing(targetDatabaseSchema) | is.null(targetDatabaseSchema)) & !allTempTables) { attr(covariateData, "metaData") <- list() if (is.null(covariateData$covariates) && is.null(covariateData$covariatesContinuous)) { warning("No data found, probably because no covariates were specified.") diff --git a/man/getDbDefaultCovariateData.Rd b/man/getDbDefaultCovariateData.Rd index bf5c1b45..c1ad6a80 100644 --- a/man/getDbDefaultCovariateData.Rd +++ b/man/getDbDefaultCovariateData.Rd @@ -13,8 +13,12 @@ getDbDefaultCovariateData( cdmVersion = "5", rowIdField = "subject_id", covariateSettings, - targetDatabaseSchema, - targetTables = list(), + targetDatabaseSchema = NULL, + targetCovariateTable = NULL, + targetCovariateRefTable = NULL, + targetAnalysisRefTable = NULL, + targetTables = list(covariates = targetCovariateTable, covariateRef = + targetCovariateRefTable, analysisRef = targetAnalysisRefTable), dropExistingTables = FALSE, aggregated = FALSE ) @@ -52,10 +56,24 @@ of the createCovariate functions, or a list of such objects.} \item{targetDatabaseSchema}{(Optional) The name of the database schema where the resulting covariates should be stored. If not provided, results will be fetched to R.} +\item{targetCovariateTable}{(Optional) The name of the table where the resulting covariates will +be stored. If not provided, results will be fetched to R. The table can be +a permanent table in the \code{targetDatabaseSchema} or a temp table. If +it is a temp table, do not specify \code{targetDatabaseSchema}. +Superseded by \code{targetTables}} + +\item{targetCovariateRefTable}{(Optional) The name of the table where the covariate reference will be stored. +Superseded by \code{targetTables}} + +\item{targetAnalysisRefTable}{(Optional) The name of the table where the analysis reference will be stored. +Superseded by \code{targetTables}} + \item{targetTables}{(Optional) list of mappings for table names. The names of the table where the resulting covariates will be if \code{targetDatabaseSchema} is specified. The tables will be created in permanent -table in the \code{targetDatabaseSchema}.} +table in the \code{targetDatabaseSchema} or as temporary tables. Tables that can be +included in this list: covariates, covariateRef, analysisRef, covariatesContinuous, +timeRef} \item{dropExistingTables}{If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged into existing table data.} diff --git a/tests/testthat/test-GetDefaultCovariates.R b/tests/testthat/test-GetDefaultCovariates.R index bd3d9096..61eca2d3 100644 --- a/tests/testthat/test-GetDefaultCovariates.R +++ b/tests/testthat/test-GetDefaultCovariates.R @@ -6,7 +6,7 @@ connectionDetails <- Eunomia::getEunomiaConnectionDetails() test_that("Test exit conditions", { connection <- DatabaseConnector::connect(connectionDetails) - + on.exit(DatabaseConnector::disconnect(connection)) # covariateSettings object type expect_error(getDbDefaultCovariateData(connection = connection, cdmDatabaseSchema = "main", @@ -24,11 +24,11 @@ test_that("Test exit conditions", { targetTables = list(covariates = "cov", covariateRef = "cov_ref", analysisRef = "cov_analysis_ref"))) - on.exit(DatabaseConnector::disconnect(connection)) }) test_that("Test target table", { connection <- DatabaseConnector::connect(connectionDetails) + on.exit(DatabaseConnector::disconnect(connection)) Eunomia::createCohorts(connectionDetails) results <- getDbDefaultCovariateData(connection = connection, @@ -58,7 +58,31 @@ test_that("Test target table", { expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM main.ut_cov_ref_agg")[1], 1) expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM main.ut_cov_analysis_ref_agg")[1], 1) - DatabaseConnector::disconnect(connection) + # Temp tables with old prototype + results <- getDbDefaultCovariateData(connection = connection, + cdmDatabaseSchema = "main", + cohortTable = "cohort", + covariateSettings = createDefaultCovariateSettings(), + aggregated = TRUE, + targetCovariateTable = "#ut_cov_agg", + targetAnalysisRefTable = "#ut_cov_ref_agg", + targetCovariateRefTable = "#ut_cov_anal_ref_agg") + + expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_agg")[1], 1) + expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_ref_agg")[1], 1) + expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_anal_ref_agg")[1], 1) + + results <- getDbDefaultCovariateData(connection = connection, + cdmDatabaseSchema = "main", + cohortTable = "cohort", + covariateSettings = createDefaultCovariateSettings(), + targetCovariateTable = "#ut_cov", + targetAnalysisRefTable = "#ut_cov_ref", + targetCovariateRefTable = "#ut_cov_analysis_ref") + + expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov")[1], 1) + expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_ref")[1], 1) + expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_analysis_ref")[1], 1) }) unlink(connectionDetails$server()) \ No newline at end of file From e29ff8d7576fe3b549be5b1e82197c8170d18083 Mon Sep 17 00:00:00 2001 From: Jamie Gilbert Date: Thu, 13 Jan 2022 14:05:56 -0800 Subject: [PATCH 04/17] Creating/dropping or appending results checked. Added details to news --- NEWS.md | 12 +++++++ R/GetDefaultCovariates.R | 35 ++++++++++--------- man/getDbDefaultCovariateData.Rd | 9 +++-- tests/testthat/test-GetDefaultCovariates.R | 39 ++++++++++++++++++++-- 4 files changed, 74 insertions(+), 21 deletions(-) diff --git a/NEWS.md b/NEWS.md index 0fd1669a..bd008137 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,15 @@ +FeatureExtraction 3.2.1 +======================= + +New Features: + +- Added ability to store aggregate results from `getDbDefaultCovariateData` in the database and added +ability to control all target tables with new `targetTables` list parameter + +Bugfixes: + +- Fixed tests and made sure storage of covariates with `getDbDefaultCovariateData` works and is consistent + FeatureExtraction 3.2.0 ======================= diff --git a/R/GetDefaultCovariates.R b/R/GetDefaultCovariates.R index a6a63519..53f3283a 100644 --- a/R/GetDefaultCovariates.R +++ b/R/GetDefaultCovariates.R @@ -40,8 +40,9 @@ #' Superseded by \code{targetTables} #' @param targetAnalysisRefTable (Optional) The name of the table where the analysis reference will be stored. #' Superseded by \code{targetTables} -#' @param dropExistingTables If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged -#' into existing table data. +#' @param dropTableIfExists If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged +#' into existing table data. Overides createTable. +#' @param createTable Run sql to create table? Code does not check if table exists. #' @template GetCovarParams #' #' @export @@ -62,7 +63,8 @@ getDbDefaultCovariateData <- function(connection, covariateRef = targetCovariateRefTable, analysisRef = targetAnalysisRefTable ), - dropExistingTables = FALSE, + dropTableIfExists = FALSE, + createTable = TRUE, aggregated = FALSE) { if (!is(covariateSettings, "covariateSettings")) { stop("Covariate settings object not of type covariateSettings") @@ -110,16 +112,19 @@ getDbDefaultCovariateData <- function(connection, } ParallelLogger::logInfo("Fetching data from server") } else { + + if (dropTableIfExists) { + createTable <- TRUE + } # Save to DB ParallelLogger::logInfo("Creating tables on server") - existingTables <- DatabaseConnector::getTableNames(connection, targetDatabaseSchema) - - convertQuery <- function(sql, table, tableExists) { + convertQuery <- function(sql, table) { outerSql <- " - {@create} ? { - IF OBJECT_ID('@table', 'U') IS NOT NULL + {@drop} ? { + IF OBJECT_ID('@table', 'U') IS NOT NULL DROP TABLE @table; - + } + {@create} ? { SELECT * INTO @table FROM ( @sub_query ) sq; } : { INSERT INTO @table @sub_query; @@ -127,7 +132,8 @@ getDbDefaultCovariateData <- function(connection, " SqlRender::render(outerSql, sub_query = gsub(";", "", sql), - create = dropExistingTables | !tableExists, + create = createTable, + drop = dropTableIfExists, table = table) } @@ -141,19 +147,18 @@ getDbDefaultCovariateData <- function(connection, } mappedTable <- SqlRender::camelCaseToSnakeCase(table) } - tableExists <- mappedTable %in% existingTables if (substr(mappedTable, 1, 1) != "#") { mappedTable <- paste0(targetDatabaseSchema, ".", mappedTable) } - if (!dropExistingTables & tableExists) { - ParallelLogger::logInfo("Appending", table, " results to table ", mappedTable) - } else { + if (createTable) { ParallelLogger::logInfo("Creating table ", mappedTable, " for ", table) + } else { + ParallelLogger::logInfo("Appending", table, " results to table ", mappedTable) } - sql <- convertQuery(sql, mappedTable, tableExists) + sql <- convertQuery(sql, mappedTable) DatabaseConnector::renderTranslateExecuteSql(connection, sql, tempEmulationSchema = oracleTempSchema, diff --git a/man/getDbDefaultCovariateData.Rd b/man/getDbDefaultCovariateData.Rd index c1ad6a80..a69f99a8 100644 --- a/man/getDbDefaultCovariateData.Rd +++ b/man/getDbDefaultCovariateData.Rd @@ -19,7 +19,8 @@ getDbDefaultCovariateData( targetAnalysisRefTable = NULL, targetTables = list(covariates = targetCovariateTable, covariateRef = targetCovariateRefTable, analysisRef = targetAnalysisRefTable), - dropExistingTables = FALSE, + dropTableIfExists = FALSE, + createTable = TRUE, aggregated = FALSE ) } @@ -75,8 +76,10 @@ table in the \code{targetDatabaseSchema} or as temporary tables. Tables that can included in this list: covariates, covariateRef, analysisRef, covariatesContinuous, timeRef} -\item{dropExistingTables}{If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged -into existing table data.} +\item{dropTableIfExists}{If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged +into existing table data. Overides createTable.} + +\item{createTable}{Run sql to create table? Code does not check if table exists.} \item{aggregated}{Should aggregate statistics be computed instead of covariates per cohort entry?} diff --git a/tests/testthat/test-GetDefaultCovariates.R b/tests/testthat/test-GetDefaultCovariates.R index 61eca2d3..45508430 100644 --- a/tests/testthat/test-GetDefaultCovariates.R +++ b/tests/testthat/test-GetDefaultCovariates.R @@ -80,9 +80,42 @@ test_that("Test target table", { targetAnalysisRefTable = "#ut_cov_ref", targetCovariateRefTable = "#ut_cov_analysis_ref") - expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_ref")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_analysis_ref")[1], 1) + covCt <- DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov")[1] + expect_gt(covCt, 1) + covRefCt <- DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_ref")[1] + expect_gt(covRefCt, 1) + anlRefCt <- DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_analysis_ref")[1] + expect_gt(anlRefCt, 1) + + # append results rather than deleting the tables + results <- getDbDefaultCovariateData(connection = connection, + cdmDatabaseSchema = "main", + cohortTable = "cohort", + covariateSettings = createDefaultCovariateSettings(), + createTable = FALSE, + dropTableIfExists = FALSE, + targetCovariateTable = "#ut_cov", + targetAnalysisRefTable = "#ut_cov_ref", + targetCovariateRefTable = "#ut_cov_analysis_ref") + + expect_equal(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov")[1], covCt * 2) + expect_equal(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_ref")[1], covRefCt * 2) + expect_equal(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_analysis_ref")[1], anlRefCt * 2) + + # Recreate tables (and check create override works) + results <- getDbDefaultCovariateData(connection = connection, + cdmDatabaseSchema = "main", + cohortTable = "cohort", + covariateSettings = createDefaultCovariateSettings(), + createTable = FALSE, + dropTableIfExists = TRUE, + targetCovariateTable = "#ut_cov", + targetAnalysisRefTable = "#ut_cov_ref", + targetCovariateRefTable = "#ut_cov_analysis_ref") + + expect_equal(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov")[1], covCt) + expect_equal(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_ref")[1], covRefCt) + expect_equal(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_analysis_ref")[1], anlRefCt) }) unlink(connectionDetails$server()) \ No newline at end of file From 40cae543b94320e8a18c5f66972cb0f9d8f7e6dc Mon Sep 17 00:00:00 2001 From: Jamie Gilbert Date: Fri, 14 Jan 2022 08:25:49 -0800 Subject: [PATCH 05/17] whitespace --- R/GetDefaultCovariates.R | 2 +- vignettes/UsingFeatureExtractionKorean.Rmd | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/R/GetDefaultCovariates.R b/R/GetDefaultCovariates.R index 53f3283a..170aaecd 100644 --- a/R/GetDefaultCovariates.R +++ b/R/GetDefaultCovariates.R @@ -155,7 +155,7 @@ getDbDefaultCovariateData <- function(connection, if (createTable) { ParallelLogger::logInfo("Creating table ", mappedTable, " for ", table) } else { - ParallelLogger::logInfo("Appending", table, " results to table ", mappedTable) + ParallelLogger::logInfo("Appending ", table, " results to table ", mappedTable) } sql <- convertQuery(sql, mappedTable) diff --git a/vignettes/UsingFeatureExtractionKorean.Rmd b/vignettes/UsingFeatureExtractionKorean.Rmd index 5bd5a3b4..3dfaa4ef 100644 --- a/vignettes/UsingFeatureExtractionKorean.Rmd +++ b/vignettes/UsingFeatureExtractionKorean.Rmd @@ -409,8 +409,7 @@ aggregated = TRUE로 지정했다. 또한 더 이상 personId 데이터를 받 ### 집계 공변량 출력 형식 집합 된 covariateData 객체의 두 가지 주요 구성 요소는 각각 이진 및 연속 공변량에 대한 공변량 및 공변 연속이다. - - ```{r eval=FALSE} +```{r eval=FALSE} covariateData2$covariates ``` ```{r echo=FALSE,message=FALSE} From e118c5dced7bfcc137bfc44b6a037b6166f324fe Mon Sep 17 00:00:00 2001 From: Anthony Sena Date: Thu, 29 Jan 2026 14:39:10 -0500 Subject: [PATCH 06/17] Fix merge problems --- DESCRIPTION | 2 +- R/GetDefaultCovariates.R | 3 +- man/getDbDefaultCovariateData.Rd | 9 ++-- tests/testthat/setup.R | 1 + tests/testthat/test-GetDefaultCovariates.R | 57 ++++++++++------------ 5 files changed, 35 insertions(+), 37 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4c1e6a34..8092ca01 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -46,6 +46,6 @@ VignetteBuilder: knitr URL: https://github.com/OHDSI/FeatureExtraction BugReports: https://github.com/OHDSI/FeatureExtraction/issues NeedsCompilation: no -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Encoding: UTF-8 Language: en-US diff --git a/R/GetDefaultCovariates.R b/R/GetDefaultCovariates.R index b8833c00..be70da92 100644 --- a/R/GetDefaultCovariates.R +++ b/R/GetDefaultCovariates.R @@ -280,8 +280,7 @@ getDbDefaultCovariateData <- function(connection, } } - if (missing(targetCovariateTable) || is.null(targetCovariateTable)) { - if ((missing(targetDatabaseSchema) | is.null(targetDatabaseSchema)) & !allTempTables) { + if ((missing(targetDatabaseSchema) || is.null(targetDatabaseSchema)) & !allTempTables) { attr(covariateData, "metaData") <- list() if (is.null(covariateData$covariates) && is.null(covariateData$covariatesContinuous)) { warning("No data found, probably because no covariates were specified.") diff --git a/man/getDbDefaultCovariateData.Rd b/man/getDbDefaultCovariateData.Rd index df01e238..65187c73 100644 --- a/man/getDbDefaultCovariateData.Rd +++ b/man/getDbDefaultCovariateData.Rd @@ -73,8 +73,7 @@ Superseded by \code{targetTables}} \item{targetCovariateRefTable}{(Optional) The name of the table where the covariate reference will be stored. Superseded by \code{targetTables}} -\item{targetAnalysisRefTable}{(Optional) The name of the table where the analysis reference will be stored. -Superseded by \code{targetTables}} +\item{targetAnalysisRefTable}{(Optional) The name of the table where the analysis reference will be stored.} \item{targetTables}{(Optional) list of mappings for table names. The names of the table where the resulting covariates will be if @@ -92,8 +91,10 @@ into existing table data. Overides createTable.} cohort entry?} \item{minCharacterizationMean}{The minimum mean value for binary characterization output. Values below this will be cut off from output. This -will help reduce the file size of the characterization output, but will remove information -on covariates that have very low values. The default is 0.} + will help reduce the file size of the characterization output, but will remove information + on covariates that have very low values. The default is 0. + + Superseded by \code{targetTables}} \item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp tables, provide a schema with write diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index dd59bcfa..03bc7ece 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -217,6 +217,7 @@ if (dbms == "sql server") { # eunomia if (dbms == "sqlite") { if (!is.null(checkRemoteFileAvailable("https://raw.githubusercontent.com/OHDSI/EunomiaDatasets/main/datasets/GiBleed/GiBleed_5.3.zip"))) { + print('got here') eunomiaConnectionDetails <- Eunomia::getEunomiaConnectionDetails(databaseFile = "testEunomia.sqlite") eunomiaCdmDatabaseSchema <- "main" eunomiaOhdsiDatabaseSchema <- "main" diff --git a/tests/testthat/test-GetDefaultCovariates.R b/tests/testthat/test-GetDefaultCovariates.R index 1a01233c..8c674acd 100644 --- a/tests/testthat/test-GetDefaultCovariates.R +++ b/tests/testthat/test-GetDefaultCovariates.R @@ -43,11 +43,10 @@ test_that("Test exit conditions", { }) test_that("Test target table", { - connection <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection)) - Eunomia::createCohorts(connectionDetails) - - results <- getDbDefaultCovariateData(connection = connection, + skip_on_cran() + skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) + + results <- getDbDefaultCovariateData(connection = eunomiaConnection, cdmDatabaseSchema = "main", cohortTable = "cohort", covariateSettings = createDefaultCovariateSettings(), @@ -56,11 +55,11 @@ test_that("Test target table", { covariateRef = "ut_cov_ref", analysisRef = "ut_cov_analysis_ref")) - expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM main.ut_cov")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM main.ut_cov_ref")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM main.ut_cov_analysis_ref")[1], 1) + expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov")[1], 1) + expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov_ref")[1], 1) + expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov_analysis_ref")[1], 1) - results <- getDbDefaultCovariateData(connection = connection, + results <- getDbDefaultCovariateData(connection = eunomiaConnection, cdmDatabaseSchema = "main", cohortTable = "cohort", covariateSettings = createDefaultCovariateSettings(), @@ -70,12 +69,12 @@ test_that("Test target table", { covariateRef = "ut_cov_ref_agg", analysisRef = "ut_cov_analysis_ref_agg")) - expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM main.ut_cov_agg")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM main.ut_cov_ref_agg")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM main.ut_cov_analysis_ref_agg")[1], 1) + expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov_agg")[1], 1) + expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov_ref_agg")[1], 1) + expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov_analysis_ref_agg")[1], 1) # Temp tables with old prototype - results <- getDbDefaultCovariateData(connection = connection, + results <- getDbDefaultCovariateData(connection = eunomiaConnection, cdmDatabaseSchema = "main", cohortTable = "cohort", covariateSettings = createDefaultCovariateSettings(), @@ -84,11 +83,11 @@ test_that("Test target table", { targetAnalysisRefTable = "#ut_cov_ref_agg", targetCovariateRefTable = "#ut_cov_anal_ref_agg") - expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_agg")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_ref_agg")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_anal_ref_agg")[1], 1) + expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_agg")[1], 1) + expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_ref_agg")[1], 1) + expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_anal_ref_agg")[1], 1) - results <- getDbDefaultCovariateData(connection = connection, + results <- getDbDefaultCovariateData(connection = eunomiaConnection, cdmDatabaseSchema = "main", cohortTable = "cohort", covariateSettings = createDefaultCovariateSettings(), @@ -96,15 +95,15 @@ test_that("Test target table", { targetAnalysisRefTable = "#ut_cov_ref", targetCovariateRefTable = "#ut_cov_analysis_ref") - covCt <- DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov")[1] + covCt <- DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov")[1] expect_gt(covCt, 1) - covRefCt <- DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_ref")[1] + covRefCt <- DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_ref")[1] expect_gt(covRefCt, 1) - anlRefCt <- DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_analysis_ref")[1] + anlRefCt <- DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_analysis_ref")[1] expect_gt(anlRefCt, 1) # append results rather than deleting the tables - results <- getDbDefaultCovariateData(connection = connection, + results <- getDbDefaultCovariateData(connection = eunomiaConnection, cdmDatabaseSchema = "main", cohortTable = "cohort", covariateSettings = createDefaultCovariateSettings(), @@ -114,12 +113,12 @@ test_that("Test target table", { targetAnalysisRefTable = "#ut_cov_ref", targetCovariateRefTable = "#ut_cov_analysis_ref") - expect_equal(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov")[1], covCt * 2) - expect_equal(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_ref")[1], covRefCt * 2) - expect_equal(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_analysis_ref")[1], anlRefCt * 2) + expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov")[1], covCt * 2) + expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_ref")[1], covRefCt * 2) + expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_analysis_ref")[1], anlRefCt * 2) # Recreate tables (and check create override works) - results <- getDbDefaultCovariateData(connection = connection, + results <- getDbDefaultCovariateData(connection = eunomiaConnection, cdmDatabaseSchema = "main", cohortTable = "cohort", covariateSettings = createDefaultCovariateSettings(), @@ -129,9 +128,7 @@ test_that("Test target table", { targetAnalysisRefTable = "#ut_cov_ref", targetCovariateRefTable = "#ut_cov_analysis_ref") - expect_equal(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov")[1], covCt) - expect_equal(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_ref")[1], covRefCt) - expect_equal(DatabaseConnector::renderTranslateQuerySql(connection, "SELECT COUNT(*) FROM #ut_cov_analysis_ref")[1], anlRefCt) + expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov")[1], covCt) + expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_ref")[1], covRefCt) + expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_analysis_ref")[1], anlRefCt) }) - -unlink(connectionDetails$server()) \ No newline at end of file From 2f29e94e566df5b27adabfe74b6a1a49311ff0cd Mon Sep 17 00:00:00 2001 From: Anthony Sena Date: Thu, 29 Jan 2026 15:56:41 -0500 Subject: [PATCH 07/17] Remove debugging message --- tests/testthat/setup.R | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 03bc7ece..dd59bcfa 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -217,7 +217,6 @@ if (dbms == "sql server") { # eunomia if (dbms == "sqlite") { if (!is.null(checkRemoteFileAvailable("https://raw.githubusercontent.com/OHDSI/EunomiaDatasets/main/datasets/GiBleed/GiBleed_5.3.zip"))) { - print('got here') eunomiaConnectionDetails <- Eunomia::getEunomiaConnectionDetails(databaseFile = "testEunomia.sqlite") eunomiaCdmDatabaseSchema <- "main" eunomiaOhdsiDatabaseSchema <- "main" From 56c8d0630db2511175d57735894599891c321bc7 Mon Sep 17 00:00:00 2001 From: Anthony Sena Date: Thu, 29 Jan 2026 16:53:19 -0500 Subject: [PATCH 08/17] Restore warning for post-coordinated concept collision --- R/GetDefaultCovariates.R | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/R/GetDefaultCovariates.R b/R/GetDefaultCovariates.R index be70da92..cd1554aa 100644 --- a/R/GetDefaultCovariates.R +++ b/R/GetDefaultCovariates.R @@ -152,13 +152,24 @@ getDbDefaultCovariateData <- function(connection, # Save to Andromeda covariateData <- Andromeda::andromeda() - queryFunction <- function(sql, tableName) { DatabaseConnector::querySqlToAndromeda(connection = connection, sql = sql, andromeda = covariateData, andromedaTableName = tableName, snakeCaseToCamelCase = TRUE) + + if (tableName == "covariateRef") { + collisions <- covariateData$covariateRef %>% + dplyr::filter(collisions > 0) %>% + dplyr::collect() + if (nrow(collisions) > 0) { + warning(sprintf( + "Collisions in covariate IDs detected for post-coordinated concepts with covariate IDs %s", + paste(collisions$covariateId, paste = ", ") + )) + } + } } ParallelLogger::logInfo("Fetching data from server") @@ -167,6 +178,7 @@ getDbDefaultCovariateData <- function(connection, if (dropTableIfExists) { createTable <- TRUE } + # Save to DB ParallelLogger::logInfo("Creating tables on server") convertQuery <- function(sql, table) { From b0d523e6feb5057ba88d218bfecd2243f00f35da Mon Sep 17 00:00:00 2001 From: jreps Date: Tue, 17 Feb 2026 17:30:32 -0500 Subject: [PATCH 09/17] fixing export FE to database - letting users export to a set of database tables instead of downloading to Andromeda --- R/GetCovariates.R | 187 ++++++++- R/GetCovariatesFromOtherCohorts.R | 23 ++ R/GetDefaultCovariates.R | 436 +++++++++++++++------ inst/sql/sql_server/CreateExportTables.sql | 59 +++ inst/sql/sql_server/DropExportTables.sql | 6 + man/getDbCohortBasedCovariatesData.Rd | 19 + man/getDbCovariateData.Rd | 38 ++ man/getDbDefaultCovariateData.Rd | 30 +- 8 files changed, 661 insertions(+), 137 deletions(-) create mode 100644 inst/sql/sql_server/CreateExportTables.sql create mode 100644 inst/sql/sql_server/DropExportTables.sql diff --git a/R/GetCovariates.R b/R/GetCovariates.R index cf2793c5..e055f36a 100644 --- a/R/GetCovariates.R +++ b/R/GetCovariates.R @@ -71,6 +71,29 @@ #' privileges where temp tables can be created. #' @param covariateCohortDatabaseSchema The database schema where the cohorts used to define the covariates can be found. #' @param covariateCohortTable The table where the cohorts used to define the covariates can be found. +#' +#' @param exportToTable Whether to export to a table rather than Andromeda object +#' @param dropTableIfExists If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged +#' into existing table data. Overides createTable. +#' @param createTable Run sql to create table? Code does not check if table exists. +#' @param targetDatabaseSchema (Optional) The name of the database schema where the resulting covariates +#' should be stored as a table. If not provided, results will be fetched to R. +#' @param targetCovariateTable (Optional) The name of the table where the resulting covariates will +#' be stored. If not provided, results will be fetched to R. The table can be +#' a permanent table in the \code{targetDatabaseSchema} or a temp table. If +#' it is a temp table, do not specify \code{targetDatabaseSchema}. +#' @param targetCovariateContinuousTable (Optional) The name of the table where the resulting continuous covariates will +#' be stored. If not provided, results will be fetched to R. The table can be +#' a permanent table in the \code{targetDatabaseSchema} or a temp table. If +#' it is a temp table, do not specify \code{targetDatabaseSchema}. +#' @param targetCovariateRefTable (Optional) The name of the table where the covariate reference will be stored. If +#' it is a temp table, do not specify \code{targetDatabaseSchema}. +#' +#' @param targetAnalysisRefTable (Optional) The name of the table where the analysis reference will be stored. If +#' it is a temp table, do not specify \code{targetDatabaseSchema}. +#' @param targetTimeRefTable (Optional) The name of the table for the time reference. If +#' it is a temp table, do not specify \code{targetDatabaseSchema}. +#' #' #' @return #' Returns an object of type \code{covariateData}, containing information on the covariates. @@ -113,6 +136,17 @@ getDbCovariateData <- function(connectionDetails = NULL, cohortIds = c(-1), rowIdField = "subject_id", covariateSettings, + + exportToTable = FALSE, + createTable = exportToTable, + dropTableIfExists = exportToTable, + targetDatabaseSchema = NULL, + targetCovariateTable = NULL, + targetCovariateContinuousTable = NULL, + targetCovariateRefTable = NULL, + targetAnalysisRefTable = NULL, + targetTimeRefTable = NULL, + aggregated = FALSE, minCharacterizationMean = 0, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), @@ -155,6 +189,19 @@ getDbCovariateData <- function(connectionDetails = NULL, } else { cohortDatabaseSchemaTable <- paste(cohortDatabaseSchema, cohortTable, sep = ".") } + + # check for temporal features in any of the settings + if(inherits(covariateSettings, 'covariateSettings')){ + anyTemporal <- covariateSettings$temporal | covariateSettings$temporalSequence + } else{ + anyTemporal <- sum(unlist(lapply( + X = covariateSettings, + FUN = function(x){ + sum(c(x$temporal,x$temporalSequence)) == 1 + }))) > 0 + } + + sql <- "SELECT cohort_definition_id, COUNT_BIG(*) AS population_size FROM @cohort_database_schema_table {@cohort_ids != -1} ? {WHERE cohort_definition_id IN (@cohort_ids)} GROUP BY cohort_definition_id;" sql <- SqlRender::render( sql = sql, @@ -192,6 +239,115 @@ getDbCovariateData <- function(connectionDetails = NULL, covariateCohortTable ) } + + # figure out tables + if (exportToTable) { + if(is.null(targetDatabaseSchema)){ + # turn off create table since the tables are temp + if(createTable){ + warning('Turning off createTable since no targetDatabaseSchema so output tables are temp') + } + createTable <- FALSE + tempOutputTables <- TRUE + # covariate tables + if (substr(targetCovariateTable, 1, 1) == "#") { + targetCovariateTable <- targetCovariateTable + } else { + targetCovariateTable <- paste0("#", targetCovariateTable) + } + # cov cont table + if (substr(targetCovariateContinuousTable, 1, 1) == "#") { + targetCovariateContinuousTable <- targetCovariateContinuousTable + } else { + targetCovariateContinuousTable <- paste0("#", targetCovariateContinuousTable) + } + # cov ref table + if (substr(targetCovariateRefTable, 1, 1) == "#") { + targetCovariateRefTable <- targetCovariateRefTable + } else { + targetCovariateRefTable <- paste0("#", targetCovariateRefTable) + } + # analysis ref table + if (substr(targetAnalysisRefTable, 1, 1) == "#") { + targetAnalysisRefTable <- targetAnalysisRefTable + } else { + targetAnalysisRefTable <- paste0("#", targetAnalysisRefTable) + } + # time ref table + if (substr(targetTimeRefTable, 1, 1) == "#") { + targetTimeRefTable <- targetTimeRefTable + } else { + targetTimeRefTable <- paste0("#", targetTimeRefTable) + } + + } else { + tempOutputTables <- FALSE + targetCovariateTable <- paste(targetDatabaseSchema, targetCovariateTable, sep = ".") + targetCovariateContinuousTable <- paste(targetDatabaseSchema, targetCovariateContinuousTable, sep = ".") + targetCovariateRefTable <- paste(targetDatabaseSchema, targetCovariateRefTable, sep = ".") + targetAnalysisRefTable <- paste(targetDatabaseSchema, targetAnalysisRefTable, sep = ".") + targetTimeRefTable <- paste(targetDatabaseSchema, targetTimeRefTable, sep = ".") + } + } + + + # drop table if required + if(dropTableIfExists & exportToTable){ + message('Dropping export tables') + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = 'DropExportTables.sql', + packageName = 'FeatureExtraction', + dbms = attr(connection, "dbms"), + tempEmulationSchema = tempEmulationSchema, + temp_table = tempOutputTables, + covariate_table = targetCovariateTable, + covariate_continuous_table = targetCovariateContinuousTable, + covariate_ref_table = targetCovariateRefTable, + analysis_ref_table = targetAnalysisRefTable, + time_ref_table = targetTimeRefTable + ) + + DatabaseConnector::executeSql( + connection = connection, + sql = sql + ) + } + + if(dropTableIfExists & !createTable & exportToTable){ + if(!tempOutputTables){ + stop('Seem to be exporting to non-temp tables but create table is FALSE') + } + } + + # create the cohort tables if required + if(createTable & exportToTable ){ + if(!tempOutputTables){ + message('Creating export tables') + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = 'CreateExportTables.sql', + packageName = 'FeatureExtraction', + dbms = attr(connection, "dbms"), + tempEmulationSchema = tempEmulationSchema, + + aggregated = aggregated, + temporal = anyTemporal, + row_id_field = 'row_id', + + covariate_table = targetCovariateTable, + covariate_continuous_table = targetCovariateContinuousTable, + covariate_ref_table = targetCovariateRefTable, + analysis_ref_table = targetAnalysisRefTable, + time_ref_table = targetTimeRefTable + ) + + DatabaseConnector::executeSql( + connection = connection, + sql = sql + ) + } + } + + for (i in 1:length(covariateSettings)) { fun <- attr(covariateSettings[[i]], "fun") args <- list( @@ -203,6 +359,15 @@ getDbCovariateData <- function(connectionDetails = NULL, cdmVersion = cdmVersion, rowIdField = rowIdField, covariateSettings = covariateSettings[[i]], + + targetCovariateTable = targetCovariateTable, + targetCovariateContinuousTable = targetCovariateContinuousTable, + targetCovariateRefTable = targetCovariateRefTable, + targetAnalysisRefTable = targetAnalysisRefTable, + targetTimeRefTable = targetTimeRefTable, + dropTableIfExists = FALSE, # can remove this input + createTable = FALSE, # can remove this input + aggregated = aggregated, minCharacterizationMean = minCharacterizationMean ) @@ -224,9 +389,15 @@ getDbCovariateData <- function(connectionDetails = NULL, } else if (hasData(tempCovariateData$covariatesContinuous)) { covariateData$covariatesContinuous <- tempCovariateData$covariatesContinuous } - - Andromeda::appendToTable(covariateData$covariateRef, tempCovariateData$covariateRef) - Andromeda::appendToTable(covariateData$analysisRef, tempCovariateData$analysisRef) + + if(hasData(tempCovariateData$covariateRef)){ + Andromeda::appendToTable(covariateData$covariateRef, tempCovariateData$covariateRef) + } + if(hasData(tempCovariateData$analysisRef)){ + Andromeda::appendToTable(covariateData$analysisRef, tempCovariateData$analysisRef) + } + + if(is.null(targetDatabaseSchema)){ for (name in names(attr(tempCovariateData, "metaData"))) { if (is.null(attr(covariateData, "metaData")[[name]])) { attr(covariateData, "metaData")[[name]] <- attr(tempCovariateData, "metaData")[[name]] @@ -239,11 +410,15 @@ getDbCovariateData <- function(connectionDetails = NULL, ) } } + } # if NULL target schema } } } - attr(covariateData, "metaData")$populationSize <- populationSize - attr(covariateData, "metaData")$cohortIds <- cohortIds + + if(!is.null(covariateData)){ + attr(covariateData, "metaData")$populationSize <- populationSize + attr(covariateData, "metaData")$cohortIds <- cohortIds + } } - return(covariateData) + return(invisible(covariateData)) } diff --git a/R/GetCovariatesFromOtherCohorts.R b/R/GetCovariatesFromOtherCohorts.R index 947efc9f..6b6b7952 100644 --- a/R/GetCovariatesFromOtherCohorts.R +++ b/R/GetCovariatesFromOtherCohorts.R @@ -22,6 +22,17 @@ #' @param covariateSettings An object of type \code{covariateSettings} as created using the #' \code{\link{createCohortBasedCovariateSettings}} or #' \code{\link{createCohortBasedTemporalCovariateSettings}} functions. +#' @param targetDatabaseSchema (Optional) The name of the database schema where the resulting covariates +#' should be stored. If not provided, results will be fetched to R. +#' @param targetCovariateTable (Optional) The name of the table where the resulting covariates will +#' be stored. If not provided, results will be fetched to R. The table can be +#' a permanent table in the \code{targetDatabaseSchema} or a temp table. If +#' it is a temp table, do not specify \code{targetDatabaseSchema}. +#' +#' @param targetCovariateRefTable (Optional) The name of the table where the covariate reference will be stored. +#' +#' @param targetAnalysisRefTable (Optional) The name of the table where the analysis reference will be stored. +#' @param targetTimeRefTable (Optional) The name of the table for the time reference #' @param minCharacterizationMean The minimum mean value for binary characterization output. Values below this will be cut off from output. This #' will help reduce the file size of the characterization output, but will remove information #' on covariates that have very low values. The default is 0. @@ -37,6 +48,11 @@ getDbCohortBasedCovariatesData <- function(connection, cdmVersion = "5", rowIdField = "subject_id", covariateSettings, + targetCovariateTable = NULL, + targetCovariateContinuousTable = NULL, + targetCovariateRefTable = NULL, + targetAnalysisRefTable = NULL, + targetTimeRefTable = NULL, aggregated = FALSE, minCharacterizationMean = 0, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")) { @@ -153,6 +169,13 @@ getDbCohortBasedCovariatesData <- function(connection, cdmVersion = cdmVersion, rowIdField = rowIdField, covariateSettings = detailledSettings, + + targetCovariateTable = targetCovariateTable, + targetCovariateContinuousTable = targetCovariateContinuousTable, + targetCovariateRefTable = targetCovariateRefTable, + targetAnalysisRefTable = targetAnalysisRefTable, + targetTimeRefTable = targetTimeRefTable, + aggregated = aggregated, minCharacterizationMean = minCharacterizationMean ) diff --git a/R/GetDefaultCovariates.R b/R/GetDefaultCovariates.R index be70da92..0a701d51 100644 --- a/R/GetDefaultCovariates.R +++ b/R/GetDefaultCovariates.R @@ -25,25 +25,20 @@ #' of the createCovariate functions, or a list of such objects. #' @param targetDatabaseSchema (Optional) The name of the database schema where the resulting covariates #' should be stored. If not provided, results will be fetched to R. -#' @param targetTables (Optional) list of mappings for table names. -#' The names of the table where the resulting covariates will be if -#' \code{targetDatabaseSchema} is specified. The tables will be created in permanent -#' table in the \code{targetDatabaseSchema} or as temporary tables. Tables that can be -#' included in this list: covariates, covariateRef, analysisRef, covariatesContinuous, -#' timeRef #' @param targetCovariateTable (Optional) The name of the table where the resulting covariates will #' be stored. If not provided, results will be fetched to R. The table can be #' a permanent table in the \code{targetDatabaseSchema} or a temp table. If #' it is a temp table, do not specify \code{targetDatabaseSchema}. -#' Superseded by \code{targetTables} +#' #' @param targetCovariateRefTable (Optional) The name of the table where the covariate reference will be stored. -#' Superseded by \code{targetTables} +#' #' @param targetAnalysisRefTable (Optional) The name of the table where the analysis reference will be stored. +#' @param targetTimeRefTable (Optional) The name of the table for the time reference #' @param minCharacterizationMean The minimum mean value for binary characterization output. Values below this will be cut off from output. This #' will help reduce the file size of the characterization output, but will remove information #' on covariates that have very low values. The default is 0. #' -#' Superseded by \code{targetTables} +#' #' @param dropTableIfExists If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged #' into existing table data. Overides createTable. #' @param createTable Run sql to create table? Code does not check if table exists. @@ -79,15 +74,13 @@ getDbDefaultCovariateData <- function(connection, cdmVersion = "5", rowIdField = "subject_id", covariateSettings, - targetDatabaseSchema = NULL, + targetCovariateTable = NULL, + targetCovariateContinuousTable = NULL, targetCovariateRefTable = NULL, targetAnalysisRefTable = NULL, - targetTables = list( - covariates = targetCovariateTable, - covariateRef = targetCovariateRefTable, - analysisRef = targetAnalysisRefTable - ), + targetTimeRefTable = NULL, + dropTableIfExists = FALSE, createTable = TRUE, aggregated = FALSE, @@ -115,6 +108,19 @@ getDbDefaultCovariateData <- function(connection, minCharacterizationMean <- utils::type.convert(minCharacterizationMean, as.is = TRUE) checkmate::assertNumeric(x = minCharacterizationMean, lower = 0, upper = 1, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) + + + targetTables <- list( + covariates = targetCovariateTable, + covariatesContinuous = targetCovariateContinuousTable, + covariateRef = targetCovariateRefTable, + analysisRef = targetAnalysisRefTable, + timeRef = targetTimeRefTable + ) + # Is the target schema missing or are all the specified tables temp + allTempTables <- all(substr(targetTables,1,1) == "#") + extractToAndromeda <- is.null(targetCovariateTable) + settings <- .toJson(covariateSettings) rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$init(system.file("", package = "FeatureExtraction")) @@ -145,116 +151,322 @@ getDbDefaultCovariateData <- function(connection, ) profile <- (!is.null(getOption("dbProfile")) && getOption("dbProfile") == TRUE) DatabaseConnector::executeSql(connection, sql, profile = profile) - - # Is the target schema missing or are all the specified tables temp - allTempTables <- all(substr(targetTables,1,1) == "#") - if ((missing(targetDatabaseSchema) | is.null(targetDatabaseSchema)) & !allTempTables) { - # Save to Andromeda - covariateData <- Andromeda::andromeda() - - - queryFunction <- function(sql, tableName) { - DatabaseConnector::querySqlToAndromeda(connection = connection, - sql = sql, - andromeda = covariateData, - andromedaTableName = tableName, - snakeCaseToCamelCase = TRUE) - } - - ParallelLogger::logInfo("Fetching data from server") - } else { - - if (dropTableIfExists) { - createTable <- TRUE - } - # Save to DB - ParallelLogger::logInfo("Creating tables on server") - convertQuery <- function(sql, table) { - outerSql <- " - {@drop} ? { - IF OBJECT_ID('@table', 'U') IS NOT NULL - DROP TABLE @table; - } - {@create} ? { - SELECT * INTO @table FROM ( @sub_query ) sq; - } : { - INSERT INTO @table @sub_query; - } - " - SqlRender::render(outerSql, - sub_query = gsub(";", "", sql), - create = createTable, - drop = dropTableIfExists, - table = table) - } - - queryFunction <- function(sql, table) { - mappedTable <- targetTables[[table]] - if (is.null(mappedTable)) { - if (allTempTables) { - # Only bother storing specified temp tables - ParallelLogger::logInfo("Skipping", table, " other mapped tables are temp") - return(NULL) - } - mappedTable <- SqlRender::camelCaseToSnakeCase(table) - } - - if (substr(mappedTable, 1, 1) != "#") { - mappedTable <- paste0(targetDatabaseSchema, ".", mappedTable) - } - - if (createTable) { - ParallelLogger::logInfo("Creating table ", mappedTable, " for ", table) - } else { - ParallelLogger::logInfo("Appending ", table, " results to table ", mappedTable) - } - - sql <- convertQuery(sql, mappedTable) - DatabaseConnector::renderTranslateExecuteSql(connection, - sql, - tempEmulationSchema = oracleTempSchema, - progressBar = FALSE, - reportOverallTime = FALSE) - } - - } - + + + + # Now we extract the results into Andromeda tables or as tables + + ParallelLogger::logInfo("Fetching data from server") start <- Sys.time() + covariateData <- Andromeda::andromeda() + # Binary or non-aggregated features if (!is.null(todo$sqlQueryFeatures)) { - sql <- SqlRender::translate(sql = todo$sqlQueryFeatures, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - queryFunction(sql, "covariates") + + # etracting covariate table + if(extractToAndromeda){ + sql <- SqlRender::translate(sql = todo$sqlQueryFeatures, + targetDialect = attr(connection, "dbms"), + oracleTempSchema = oracleTempSchema) + DatabaseConnector::querySqlToAndromeda( + connection = connection, + sql = sql, + andromeda = covariateData, + andromedaTableName = "covariates", + snakeCaseToCamelCase = TRUE + ) + } else{ + + # for testing to see column order + #print(todo$sqlQueryFeatures) + + sql <- " + {@temp_tables}?{ + + SELECT * INTO @target_covariate_table + FROM (@sub_query) main_table; + + }:{ + + INSERT INTO @target_covariate_table( + + {@temporal | @temporal_sequence} ? {time_id,} + + {@aggregated}?{ + cohort_definition_id, + covariate_id, + sum_value, + average_value + }:{ + covariate_id, + row_id, + covariate_value + } + + ) @sub_query; + } " + + sql <- SqlRender::render( + sql = sql, + temp_tables = allTempTables, + target_covariate_table = targetTables$covariates, + sub_query = gsub(";", "", todo$sqlQueryFeatures), + temporal = covariateSettings$temporal, + temporal_sequence = covariateSettings$temporalSequence, + aggregated = aggregated + ) + + sql <- SqlRender::translate( + sql = sql, + targetDialect = DatabaseConnector::dbms(connection), + tempEmulationSchema = tempEmulationSchema + ) + DatabaseConnector::executeSql( + connection = connection, + sql = sql + ) + + } + } # Continuous aggregated features if (!is.null(todo$sqlQueryContinuousFeatures)) { - sql <- SqlRender::translate(sql = todo$sqlQueryContinuousFeatures, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - queryFunction(sql, "covariatesContinuous") + + if(extractToAndromeda){ + sql <- SqlRender::translate(sql = todo$sqlQueryContinuousFeatures, + targetDialect = attr(connection, "dbms"), + oracleTempSchema = oracleTempSchema) + DatabaseConnector::querySqlToAndromeda( + connection = connection, + sql = sql, + andromeda = covariateData, + andromedaTableName = "covariatesContinuous", + snakeCaseToCamelCase = TRUE + ) + } else{ + sql <- " + + {@temp_tables}?{ + + SELECT * INTO @target_covariate_continuous_table + FROM (@sub_query) main_table; + + }:{ + + INSERT INTO @target_covariate_continuous_table( + {@aggregated}?{ + + cohort_definition_id, + covariate_id, + {@temporal | @temporal_sequence} ? {time_id,} + count_value, + min_value, + max_value, + average_value, + standard_deviation, + median_value, + p10_value, + p25_value, + p75_value, + p90_value + + }:{ + + covariate_id, + {@temporal | @temporal_sequence} ? {time_id,} + row_id, + covariate_value + + } + + ) @sub_query; + }" + + sql <- SqlRender::render( + sql = sql, + temp_tables = allTempTables, + target_covariate_continuous_table = targetTables$covariatesContinuous, + sub_query = gsub(";", "", todo$sqlQueryContinuousFeatures), + temporal = covariateSettings$temporal, + temporal_sequence = covariateSettings$temporalSequence, + aggregated = aggregated + ) + + sql <- SqlRender::translate( + sql = sql, + targetDialect = DatabaseConnector::dbms(connection), + tempEmulationSchema = tempEmulationSchema + ) + DatabaseConnector::executeSql( + connection = connection, + sql = sql + ) + } + } # Covariate reference - sql <- SqlRender::translate(sql = todo$sqlQueryFeatureRef, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - - queryFunction(sql, "covariateRef") + if (!is.null(todo$sqlQueryFeatureRef)) { + + if(extractToAndromeda){ + sql <- SqlRender::translate(sql = todo$sqlQueryFeatureRef, + targetDialect = attr(connection, "dbms"), + oracleTempSchema = oracleTempSchema) + DatabaseConnector::querySqlToAndromeda( + connection = connection, + sql = sql, + andromeda = covariateData, + andromedaTableName = "covariateRef", + snakeCaseToCamelCase = TRUE + ) + } else{ + sql <- " + {@temp_tables}?{ + + SELECT * INTO @target_covariate_ref_table + FROM (@sub_query) main_table; + + }:{ + + INSERT INTO @target_covariate_ref_table( + covariate_id, + covariate_name, + analysis_id, + concept_id, + value_as_concept_id, + collisions + ) @sub_query ; + }" + + sql <- SqlRender::render( + sql = sql, + temp_tables = allTempTables, + target_covariate_ref_table = targetTables$covariateRef, + sub_query = gsub(";", "", todo$sqlQueryFeatureRef), + temporal = covariateSettings$temporal, + temporal_sequence = covariateSettings$temporalSequence + ) + + sql <- SqlRender::translate( + sql = sql, + targetDialect = DatabaseConnector::dbms(connection), + tempEmulationSchema = tempEmulationSchema + ) + DatabaseConnector::executeSql( + connection = connection, + sql = sql + ) + } + + } + # Analysis reference - sql <- SqlRender::translate(sql = todo$sqlQueryAnalysisRef, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - queryFunction(sql, "analysisRef") + if (!is.null(todo$sqlQueryAnalysisRef)) { + + if(extractToAndromeda){ + sql <- SqlRender::translate(sql = todo$sqlQueryAnalysisRef, + targetDialect = attr(connection, "dbms"), + oracleTempSchema = oracleTempSchema) + DatabaseConnector::querySqlToAndromeda( + connection = connection, + sql = sql, + andromeda = covariateData, + andromedaTableName = "analysisRef", + snakeCaseToCamelCase = TRUE + ) + } else{ + sql <- " + {@temp_tables}?{ + + SELECT * INTO @target_analysis_ref_table + FROM (@sub_query) main_table; + + }:{ + + INSERT INTO @target_analysis_ref_table( + analysis_id, + analysis_name, + domain_id, + {!@temporal} ? { + start_day, + end_day, + } + is_binary, + missing_means_zero + ) @sub_query ; + }" + + sql <- SqlRender::render( + sql = sql, + temp_tables = allTempTables, + target_analysis_ref_table = targetTables$analysisRef, + sub_query = gsub(";", "", todo$sqlQueryAnalysisRef), + temporal = covariateSettings$temporal | covariateSettings$temporalSequence + ) + + sql <- SqlRender::translate( + sql = sql, + targetDialect = DatabaseConnector::dbms(connection), + tempEmulationSchema = tempEmulationSchema + ) + DatabaseConnector::executeSql( + connection = connection, + sql = sql + ) + } + + } + # Time reference if (!is.null(todo$sqlQueryTimeRef)) { - sql <- SqlRender::translate(sql = todo$sqlQueryTimeRef, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - queryFunction(sql, "timeRef") + + if(extractToAndromeda){ + sql <- SqlRender::translate(sql = todo$sqlQueryTimeRef, + targetDialect = attr(connection, "dbms"), + oracleTempSchema = oracleTempSchema) + DatabaseConnector::querySqlToAndromeda( + connection = connection, + sql = sql, + andromeda = covariateData, + andromedaTableName = "timeRef", + snakeCaseToCamelCase = TRUE + ) + } else{ + # TODO - what columns are in time ref table?! + sql <- " + {@temp_tables}?{ + + SELECT * INTO @target_time_ref_table + FROM (@sub_query) main_table; + + }:{ + INSERT INTO @target_time_ref_table( + time_id + ) @sub_query; + } " + + sql <- SqlRender::render( + sql = sql, + temp_tables = allTempTables, + target_covariate_ref_table = targetTables$timeRef, + sub_query = gsub(";", "", todo$sqlQueryTimeRef), + temporal = covariateSettings$temporal | covariateSettings$temporalSequence + ) + + sql <- SqlRender::translate( + sql = sql, + targetDialect = DatabaseConnector::dbms(connection), + tempEmulationSchema = tempEmulationSchema + ) + DatabaseConnector::executeSql( + connection = connection, + sql = sql + ) + } + } delta <- Sys.time() - start @@ -280,7 +492,7 @@ getDbDefaultCovariateData <- function(connection, } } - if ((missing(targetDatabaseSchema) || is.null(targetDatabaseSchema)) & !allTempTables) { + if (extractToAndromeda) { attr(covariateData, "metaData") <- list() if (is.null(covariateData$covariates) && is.null(covariateData$covariatesContinuous)) { warning("No data found, probably because no covariates were specified.") @@ -293,5 +505,7 @@ getDbDefaultCovariateData <- function(connection, class(covariateData) <- "CovariateData" attr(class(covariateData), "package") <- "FeatureExtraction" return(covariateData) + } else{ + return(invisible(NULL)) } } diff --git a/inst/sql/sql_server/CreateExportTables.sql b/inst/sql/sql_server/CreateExportTables.sql new file mode 100644 index 00000000..73553baf --- /dev/null +++ b/inst/sql/sql_server/CreateExportTables.sql @@ -0,0 +1,59 @@ +CREATE TABLE @covariate_table ( + covariate_id BIGINT, + {@temporal}?{time_id BIGINT,} + {@aggregated}?{ + cohort_definition_id BIGINT, + sum_value BIGINT, + average_value FLOAT + }:{ + @row_id_field BIGINT, + covariate_value INT + } + ); + +CREATE TABLE @covariate_continuous_table ( + covariate_id BIGINT, + {@temporal}?{time_id BIGINT,} + + {@aggregated}?{ + cohort_definition_id BIGINT, + count_value BIGINT, + min_value FLOAT, + max_value FLOAT, + average_value FLOAT, + standard_deviation FLOAT, + median_value FLOAT, + p10_value FLOAT, + p25_value FLOAT, + p75_value FLOAT, + p90_value FLOAT + }:{ + @row_id_field BIGINT, + covariate_value FLOAT + } + + ); + +CREATE TABLE @covariate_ref_table ( + covariate_id BIGINT, + covariate_name VARCHAR(512), + analysis_id INT, + concept_id INT, + value_as_concept_id INT, + collisions INT + ); + +CREATE TABLE @analysis_ref_table ( + analysis_id BIGINT, + analysis_name VARCHAR(512), + domain_id VARCHAR(20), + start_day INT, + end_day INT, + is_binary VARCHAR(1), + missing_means_zero VARCHAR(1) + ); + + +CREATE TABLE @time_ref_table ( + time_id BIGINT + ); \ No newline at end of file diff --git a/inst/sql/sql_server/DropExportTables.sql b/inst/sql/sql_server/DropExportTables.sql new file mode 100644 index 00000000..233e54a2 --- /dev/null +++ b/inst/sql/sql_server/DropExportTables.sql @@ -0,0 +1,6 @@ +DROP TABLE IF EXISTS @covariate_table; +DROP TABLE IF EXISTS @covariate_continuous_table; +DROP TABLE IF EXISTS @covariate_ref_table; +DROP TABLE IF EXISTS @analysis_ref_table; +DROP TABLE IF EXISTS @time_ref_table; + diff --git a/man/getDbCohortBasedCovariatesData.Rd b/man/getDbCohortBasedCovariatesData.Rd index 933bcd07..e7e5e668 100644 --- a/man/getDbCohortBasedCovariatesData.Rd +++ b/man/getDbCohortBasedCovariatesData.Rd @@ -14,6 +14,11 @@ getDbCohortBasedCovariatesData( cdmVersion = "5", rowIdField = "subject_id", covariateSettings, + targetCovariateTable = NULL, + targetCovariateContinuousTable = NULL, + targetCovariateRefTable = NULL, + targetAnalysisRefTable = NULL, + targetTimeRefTable = NULL, aggregated = FALSE, minCharacterizationMean = 0, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema") @@ -54,6 +59,17 @@ is more than one period per person.} \code{\link{createCohortBasedCovariateSettings}} or \code{\link{createCohortBasedTemporalCovariateSettings}} functions.} +\item{targetCovariateTable}{(Optional) The name of the table where the resulting covariates will +be stored. If not provided, results will be fetched to R. The table can be +a permanent table in the \code{targetDatabaseSchema} or a temp table. If +it is a temp table, do not specify \code{targetDatabaseSchema}.} + +\item{targetCovariateRefTable}{(Optional) The name of the table where the covariate reference will be stored.} + +\item{targetAnalysisRefTable}{(Optional) The name of the table where the analysis reference will be stored.} + +\item{targetTimeRefTable}{(Optional) The name of the table for the time reference} + \item{aggregated}{Should aggregate statistics be computed instead of covariates per cohort entry?} @@ -64,6 +80,9 @@ on covariates that have very low values. The default is 0.} \item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp tables, provide a schema with write privileges where temp tables can be created.} + +\item{targetDatabaseSchema}{(Optional) The name of the database schema where the resulting covariates +should be stored. If not provided, results will be fetched to R.} } \value{ Returns an object of type \code{CovariateData}, which is an Andromeda object containing information on the baseline covariates. diff --git a/man/getDbCovariateData.Rd b/man/getDbCovariateData.Rd index 92aa19bb..b2b2fb59 100644 --- a/man/getDbCovariateData.Rd +++ b/man/getDbCovariateData.Rd @@ -17,6 +17,15 @@ getDbCovariateData( cohortIds = c(-1), rowIdField = "subject_id", covariateSettings, + exportToTable = FALSE, + createTable = exportToTable, + dropTableIfExists = exportToTable, + targetDatabaseSchema = NULL, + targetCovariateTable = NULL, + targetCovariateContinuousTable = NULL, + targetCovariateRefTable = NULL, + targetAnalysisRefTable = NULL, + targetTimeRefTable = NULL, aggregated = FALSE, minCharacterizationMean = 0, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), @@ -69,6 +78,35 @@ there is more than one period per person.} \item{covariateSettings}{Either an object of type \code{covariateSettings} as created using one of the createCovariate functions, or a list of such objects.} +\item{exportToTable}{Whether to export to a table rather than Andromeda object} + +\item{createTable}{Run sql to create table? Code does not check if table exists.} + +\item{dropTableIfExists}{If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged +into existing table data. Overides createTable.} + +\item{targetDatabaseSchema}{(Optional) The name of the database schema where the resulting covariates +should be stored as a table. If not provided, results will be fetched to R.} + +\item{targetCovariateTable}{(Optional) The name of the table where the resulting covariates will +be stored. If not provided, results will be fetched to R. The table can be +a permanent table in the \code{targetDatabaseSchema} or a temp table. If +it is a temp table, do not specify \code{targetDatabaseSchema}.} + +\item{targetCovariateContinuousTable}{(Optional) The name of the table where the resulting continuous covariates will +be stored. If not provided, results will be fetched to R. The table can be +a permanent table in the \code{targetDatabaseSchema} or a temp table. If +it is a temp table, do not specify \code{targetDatabaseSchema}.} + +\item{targetCovariateRefTable}{(Optional) The name of the table where the covariate reference will be stored. If +it is a temp table, do not specify \code{targetDatabaseSchema}.} + +\item{targetAnalysisRefTable}{(Optional) The name of the table where the analysis reference will be stored. If +it is a temp table, do not specify \code{targetDatabaseSchema}.} + +\item{targetTimeRefTable}{(Optional) The name of the table for the time reference. If +it is a temp table, do not specify \code{targetDatabaseSchema}.} + \item{aggregated}{Should aggregate statistics be computed instead of covariates per cohort entry? If aggregated is set to FALSE, the results returned will be based on each subject_id and cohort_start_date in your cohort table. If your cohort diff --git a/man/getDbDefaultCovariateData.Rd b/man/getDbDefaultCovariateData.Rd index 65187c73..aa1ae71d 100644 --- a/man/getDbDefaultCovariateData.Rd +++ b/man/getDbDefaultCovariateData.Rd @@ -14,12 +14,11 @@ getDbDefaultCovariateData( cdmVersion = "5", rowIdField = "subject_id", covariateSettings, - targetDatabaseSchema = NULL, targetCovariateTable = NULL, + targetCovariateContinuousTable = NULL, targetCovariateRefTable = NULL, targetAnalysisRefTable = NULL, - targetTables = list(covariates = targetCovariateTable, covariateRef = - targetCovariateRefTable, analysisRef = targetAnalysisRefTable), + targetTimeRefTable = NULL, dropTableIfExists = FALSE, createTable = TRUE, aggregated = FALSE, @@ -61,26 +60,16 @@ is more than one period per person.} \item{covariateSettings}{Either an object of type \code{covariateSettings} as created using one of the createCovariate functions, or a list of such objects.} -\item{targetDatabaseSchema}{(Optional) The name of the database schema where the resulting covariates -should be stored. If not provided, results will be fetched to R.} - \item{targetCovariateTable}{(Optional) The name of the table where the resulting covariates will be stored. If not provided, results will be fetched to R. The table can be a permanent table in the \code{targetDatabaseSchema} or a temp table. If -it is a temp table, do not specify \code{targetDatabaseSchema}. -Superseded by \code{targetTables}} +it is a temp table, do not specify \code{targetDatabaseSchema}.} -\item{targetCovariateRefTable}{(Optional) The name of the table where the covariate reference will be stored. -Superseded by \code{targetTables}} +\item{targetCovariateRefTable}{(Optional) The name of the table where the covariate reference will be stored.} \item{targetAnalysisRefTable}{(Optional) The name of the table where the analysis reference will be stored.} -\item{targetTables}{(Optional) list of mappings for table names. -The names of the table where the resulting covariates will be if -\code{targetDatabaseSchema} is specified. The tables will be created in permanent -table in the \code{targetDatabaseSchema} or as temporary tables. Tables that can be -included in this list: covariates, covariateRef, analysisRef, covariatesContinuous, -timeRef} +\item{targetTimeRefTable}{(Optional) The name of the table for the time reference} \item{dropTableIfExists}{If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged into existing table data. Overides createTable.} @@ -91,14 +80,15 @@ into existing table data. Overides createTable.} cohort entry?} \item{minCharacterizationMean}{The minimum mean value for binary characterization output. Values below this will be cut off from output. This - will help reduce the file size of the characterization output, but will remove information - on covariates that have very low values. The default is 0. - - Superseded by \code{targetTables}} +will help reduce the file size of the characterization output, but will remove information +on covariates that have very low values. The default is 0.} \item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp tables, provide a schema with write privileges where temp tables can be created.} + +\item{targetDatabaseSchema}{(Optional) The name of the database schema where the resulting covariates +should be stored. If not provided, results will be fetched to R.} } \value{ Returns an object of type \code{CovariateData}, which is an Andromeda object containing information on the baseline covariates. From 5bf3b2acca4f73c39db70151e339a9abfe5e25a1 Mon Sep 17 00:00:00 2001 From: jreps Date: Wed, 18 Feb 2026 08:47:02 -0500 Subject: [PATCH 10/17] Update CreateExportTables.sql fixing time_ref columns --- inst/sql/sql_server/CreateExportTables.sql | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/inst/sql/sql_server/CreateExportTables.sql b/inst/sql/sql_server/CreateExportTables.sql index 73553baf..7065214e 100644 --- a/inst/sql/sql_server/CreateExportTables.sql +++ b/inst/sql/sql_server/CreateExportTables.sql @@ -55,5 +55,8 @@ CREATE TABLE @analysis_ref_table ( CREATE TABLE @time_ref_table ( - time_id BIGINT + time_part VARCHAR(20), + time_interval BIGINT, + sequence_start_day BIGINT, + sequence_end_day BIGINT ); \ No newline at end of file From 7e59471fb5c35eed329d4e737eaf0b9f30ac4d4f Mon Sep 17 00:00:00 2001 From: jreps Date: Fri, 20 Feb 2026 10:53:43 -0500 Subject: [PATCH 11/17] changing temp table insert --- R/GetCovariates.R | 211 +++++++++++------------ R/GetDefaultCovariates.R | 55 +----- inst/sql/sql_server/DropExportTables.sql | 10 ++ 3 files changed, 120 insertions(+), 156 deletions(-) diff --git a/R/GetCovariates.R b/R/GetCovariates.R index e055f36a..c3a03512 100644 --- a/R/GetCovariates.R +++ b/R/GetCovariates.R @@ -201,6 +201,105 @@ getDbCovariateData <- function(connectionDetails = NULL, }))) > 0 } + # Create export tables + # figure out tables + if (exportToTable) { + if(is.null(targetDatabaseSchema)){ + # turn off create table since the tables are temp + tempOutputTables <- TRUE + # covariate tables + if (substr(targetCovariateTable, 1, 1) == "#") { + targetCovariateTable <- targetCovariateTable + } else { + targetCovariateTable <- paste0("#", targetCovariateTable) + } + # cov cont table + if (substr(targetCovariateContinuousTable, 1, 1) == "#") { + targetCovariateContinuousTable <- targetCovariateContinuousTable + } else { + targetCovariateContinuousTable <- paste0("#", targetCovariateContinuousTable) + } + # cov ref table + if (substr(targetCovariateRefTable, 1, 1) == "#") { + targetCovariateRefTable <- targetCovariateRefTable + } else { + targetCovariateRefTable <- paste0("#", targetCovariateRefTable) + } + # analysis ref table + if (substr(targetAnalysisRefTable, 1, 1) == "#") { + targetAnalysisRefTable <- targetAnalysisRefTable + } else { + targetAnalysisRefTable <- paste0("#", targetAnalysisRefTable) + } + # time ref table + if (substr(targetTimeRefTable, 1, 1) == "#") { + targetTimeRefTable <- targetTimeRefTable + } else { + targetTimeRefTable <- paste0("#", targetTimeRefTable) + } + + } else { + tempOutputTables <- FALSE + targetCovariateTable <- paste(targetDatabaseSchema, targetCovariateTable, sep = ".") + targetCovariateContinuousTable <- paste(targetDatabaseSchema, targetCovariateContinuousTable, sep = ".") + targetCovariateRefTable <- paste(targetDatabaseSchema, targetCovariateRefTable, sep = ".") + targetAnalysisRefTable <- paste(targetDatabaseSchema, targetAnalysisRefTable, sep = ".") + targetTimeRefTable <- paste(targetDatabaseSchema, targetTimeRefTable, sep = ".") + } + + # drop table if required + if(dropTableIfExists){ + message('Dropping export tables') + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = 'DropExportTables.sql', + packageName = 'FeatureExtraction', + dbms = attr(connection, "dbms"), + tempEmulationSchema = tempEmulationSchema, + temp_tables = tempOutputTables, + covariate_table = targetCovariateTable, + covariate_continuous_table = targetCovariateContinuousTable, + covariate_ref_table = targetCovariateRefTable, + analysis_ref_table = targetAnalysisRefTable, + time_ref_table = targetTimeRefTable + ) + + DatabaseConnector::executeSql( + connection = connection, + sql = sql + ) + } + + if(dropTableIfExists & !createTable){ + stop('Seem to be exporting to tables but create table is FALSE and dropTable is TRUE') + } + + # create the cohort tables if required + if(createTable){ + message('Creating export tables') + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = 'CreateExportTables.sql', + packageName = 'FeatureExtraction', + dbms = attr(connection, "dbms"), + tempEmulationSchema = tempEmulationSchema, + + aggregated = aggregated, + temporal = anyTemporal, + row_id_field = 'row_id', + + covariate_table = targetCovariateTable, + covariate_continuous_table = targetCovariateContinuousTable, + covariate_ref_table = targetCovariateRefTable, + analysis_ref_table = targetAnalysisRefTable, + time_ref_table = targetTimeRefTable + ) + + DatabaseConnector::executeSql( + connection = connection, + sql = sql + ) + } + + } sql <- "SELECT cohort_definition_id, COUNT_BIG(*) AS population_size FROM @cohort_database_schema_table {@cohort_ids != -1} ? {WHERE cohort_definition_id IN (@cohort_ids)} GROUP BY cohort_definition_id;" sql <- SqlRender::render( @@ -240,114 +339,6 @@ getDbCovariateData <- function(connectionDetails = NULL, ) } - # figure out tables - if (exportToTable) { - if(is.null(targetDatabaseSchema)){ - # turn off create table since the tables are temp - if(createTable){ - warning('Turning off createTable since no targetDatabaseSchema so output tables are temp') - } - createTable <- FALSE - tempOutputTables <- TRUE - # covariate tables - if (substr(targetCovariateTable, 1, 1) == "#") { - targetCovariateTable <- targetCovariateTable - } else { - targetCovariateTable <- paste0("#", targetCovariateTable) - } - # cov cont table - if (substr(targetCovariateContinuousTable, 1, 1) == "#") { - targetCovariateContinuousTable <- targetCovariateContinuousTable - } else { - targetCovariateContinuousTable <- paste0("#", targetCovariateContinuousTable) - } - # cov ref table - if (substr(targetCovariateRefTable, 1, 1) == "#") { - targetCovariateRefTable <- targetCovariateRefTable - } else { - targetCovariateRefTable <- paste0("#", targetCovariateRefTable) - } - # analysis ref table - if (substr(targetAnalysisRefTable, 1, 1) == "#") { - targetAnalysisRefTable <- targetAnalysisRefTable - } else { - targetAnalysisRefTable <- paste0("#", targetAnalysisRefTable) - } - # time ref table - if (substr(targetTimeRefTable, 1, 1) == "#") { - targetTimeRefTable <- targetTimeRefTable - } else { - targetTimeRefTable <- paste0("#", targetTimeRefTable) - } - - } else { - tempOutputTables <- FALSE - targetCovariateTable <- paste(targetDatabaseSchema, targetCovariateTable, sep = ".") - targetCovariateContinuousTable <- paste(targetDatabaseSchema, targetCovariateContinuousTable, sep = ".") - targetCovariateRefTable <- paste(targetDatabaseSchema, targetCovariateRefTable, sep = ".") - targetAnalysisRefTable <- paste(targetDatabaseSchema, targetAnalysisRefTable, sep = ".") - targetTimeRefTable <- paste(targetDatabaseSchema, targetTimeRefTable, sep = ".") - } - } - - - # drop table if required - if(dropTableIfExists & exportToTable){ - message('Dropping export tables') - sql <- SqlRender::loadRenderTranslateSql( - sqlFilename = 'DropExportTables.sql', - packageName = 'FeatureExtraction', - dbms = attr(connection, "dbms"), - tempEmulationSchema = tempEmulationSchema, - temp_table = tempOutputTables, - covariate_table = targetCovariateTable, - covariate_continuous_table = targetCovariateContinuousTable, - covariate_ref_table = targetCovariateRefTable, - analysis_ref_table = targetAnalysisRefTable, - time_ref_table = targetTimeRefTable - ) - - DatabaseConnector::executeSql( - connection = connection, - sql = sql - ) - } - - if(dropTableIfExists & !createTable & exportToTable){ - if(!tempOutputTables){ - stop('Seem to be exporting to non-temp tables but create table is FALSE') - } - } - - # create the cohort tables if required - if(createTable & exportToTable ){ - if(!tempOutputTables){ - message('Creating export tables') - sql <- SqlRender::loadRenderTranslateSql( - sqlFilename = 'CreateExportTables.sql', - packageName = 'FeatureExtraction', - dbms = attr(connection, "dbms"), - tempEmulationSchema = tempEmulationSchema, - - aggregated = aggregated, - temporal = anyTemporal, - row_id_field = 'row_id', - - covariate_table = targetCovariateTable, - covariate_continuous_table = targetCovariateContinuousTable, - covariate_ref_table = targetCovariateRefTable, - analysis_ref_table = targetAnalysisRefTable, - time_ref_table = targetTimeRefTable - ) - - DatabaseConnector::executeSql( - connection = connection, - sql = sql - ) - } - } - - for (i in 1:length(covariateSettings)) { fun <- attr(covariateSettings[[i]], "fun") args <- list( @@ -397,7 +388,7 @@ getDbCovariateData <- function(connectionDetails = NULL, Andromeda::appendToTable(covariateData$analysisRef, tempCovariateData$analysisRef) } - if(is.null(targetDatabaseSchema)){ + if(!exportToTable){ for (name in names(attr(tempCovariateData, "metaData"))) { if (is.null(attr(covariateData, "metaData")[[name]])) { attr(covariateData, "metaData")[[name]] <- attr(tempCovariateData, "metaData")[[name]] @@ -410,7 +401,7 @@ getDbCovariateData <- function(connectionDetails = NULL, ) } } - } # if NULL target schema + } # if not exporting } } } diff --git a/R/GetDefaultCovariates.R b/R/GetDefaultCovariates.R index 0a701d51..01d9bfcf 100644 --- a/R/GetDefaultCovariates.R +++ b/R/GetDefaultCovariates.R @@ -181,13 +181,6 @@ getDbDefaultCovariateData <- function(connection, #print(todo$sqlQueryFeatures) sql <- " - {@temp_tables}?{ - - SELECT * INTO @target_covariate_table - FROM (@sub_query) main_table; - - }:{ - INSERT INTO @target_covariate_table( {@temporal | @temporal_sequence} ? {time_id,} @@ -203,8 +196,7 @@ getDbDefaultCovariateData <- function(connection, covariate_value } - ) @sub_query; - } " + ) @sub_query; " sql <- SqlRender::render( sql = sql, @@ -246,14 +238,6 @@ getDbDefaultCovariateData <- function(connection, ) } else{ sql <- " - - {@temp_tables}?{ - - SELECT * INTO @target_covariate_continuous_table - FROM (@sub_query) main_table; - - }:{ - INSERT INTO @target_covariate_continuous_table( {@aggregated}?{ @@ -280,8 +264,7 @@ getDbDefaultCovariateData <- function(connection, } - ) @sub_query; - }" + ) @sub_query;" sql <- SqlRender::render( sql = sql, @@ -322,13 +305,6 @@ getDbDefaultCovariateData <- function(connection, ) } else{ sql <- " - {@temp_tables}?{ - - SELECT * INTO @target_covariate_ref_table - FROM (@sub_query) main_table; - - }:{ - INSERT INTO @target_covariate_ref_table( covariate_id, covariate_name, @@ -336,8 +312,7 @@ getDbDefaultCovariateData <- function(connection, concept_id, value_as_concept_id, collisions - ) @sub_query ; - }" + ) @sub_query ;" sql <- SqlRender::render( sql = sql, @@ -378,13 +353,6 @@ getDbDefaultCovariateData <- function(connection, ) } else{ sql <- " - {@temp_tables}?{ - - SELECT * INTO @target_analysis_ref_table - FROM (@sub_query) main_table; - - }:{ - INSERT INTO @target_analysis_ref_table( analysis_id, analysis_name, @@ -395,8 +363,7 @@ getDbDefaultCovariateData <- function(connection, } is_binary, missing_means_zero - ) @sub_query ; - }" + ) @sub_query ;" sql <- SqlRender::render( sql = sql, @@ -437,16 +404,12 @@ getDbDefaultCovariateData <- function(connection, } else{ # TODO - what columns are in time ref table?! sql <- " - {@temp_tables}?{ - - SELECT * INTO @target_time_ref_table - FROM (@sub_query) main_table; - - }:{ INSERT INTO @target_time_ref_table( - time_id - ) @sub_query; - } " + time_part, + time_interval, + sequence_start_day, + sequence_end_day + ) @sub_query;" sql <- SqlRender::render( sql = sql, diff --git a/inst/sql/sql_server/DropExportTables.sql b/inst/sql/sql_server/DropExportTables.sql index 233e54a2..9b910341 100644 --- a/inst/sql/sql_server/DropExportTables.sql +++ b/inst/sql/sql_server/DropExportTables.sql @@ -1,6 +1,16 @@ +{@temp_tables}?{ +IF OBJECT_ID('tempdb..@covariate_table', 'U') IS NOT NULL DROP TABLE @covariate_table; +IF OBJECT_ID('tempdb..@covariate_continuous_table', 'U') IS NOT NULL DROP TABLE @covariate_continuous_table; +IF OBJECT_ID('tempdb..@covariate_ref_table', 'U') IS NOT NULL DROP TABLE @covariate_ref_table; +IF OBJECT_ID('tempdb..@analysis_ref_table', 'U') IS NOT NULL DROP TABLE @analysis_ref_table; +IF OBJECT_ID('tempdb..@time_ref_table', 'U') IS NOT NULL DROP TABLE @time_ref_table; +}:{ DROP TABLE IF EXISTS @covariate_table; DROP TABLE IF EXISTS @covariate_continuous_table; DROP TABLE IF EXISTS @covariate_ref_table; DROP TABLE IF EXISTS @analysis_ref_table; DROP TABLE IF EXISTS @time_ref_table; +} + + From bd9d31f3f7053ac54bb414f5fb579466750a1999 Mon Sep 17 00:00:00 2001 From: jreps Date: Fri, 20 Feb 2026 12:39:45 -0500 Subject: [PATCH 12/17] Update GetDefaultCovariates.R --- R/GetDefaultCovariates.R | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/R/GetDefaultCovariates.R b/R/GetDefaultCovariates.R index 01d9bfcf..f62d8795 100644 --- a/R/GetDefaultCovariates.R +++ b/R/GetDefaultCovariates.R @@ -200,7 +200,6 @@ getDbDefaultCovariateData <- function(connection, sql <- SqlRender::render( sql = sql, - temp_tables = allTempTables, target_covariate_table = targetTables$covariates, sub_query = gsub(";", "", todo$sqlQueryFeatures), temporal = covariateSettings$temporal, @@ -268,7 +267,6 @@ getDbDefaultCovariateData <- function(connection, sql <- SqlRender::render( sql = sql, - temp_tables = allTempTables, target_covariate_continuous_table = targetTables$covariatesContinuous, sub_query = gsub(";", "", todo$sqlQueryContinuousFeatures), temporal = covariateSettings$temporal, @@ -316,11 +314,8 @@ getDbDefaultCovariateData <- function(connection, sql <- SqlRender::render( sql = sql, - temp_tables = allTempTables, target_covariate_ref_table = targetTables$covariateRef, - sub_query = gsub(";", "", todo$sqlQueryFeatureRef), - temporal = covariateSettings$temporal, - temporal_sequence = covariateSettings$temporalSequence + sub_query = gsub(";", "", todo$sqlQueryFeatureRef) ) sql <- SqlRender::translate( @@ -367,7 +362,6 @@ getDbDefaultCovariateData <- function(connection, sql <- SqlRender::render( sql = sql, - temp_tables = allTempTables, target_analysis_ref_table = targetTables$analysisRef, sub_query = gsub(";", "", todo$sqlQueryAnalysisRef), temporal = covariateSettings$temporal | covariateSettings$temporalSequence @@ -413,10 +407,8 @@ getDbDefaultCovariateData <- function(connection, sql <- SqlRender::render( sql = sql, - temp_tables = allTempTables, target_covariate_ref_table = targetTables$timeRef, - sub_query = gsub(";", "", todo$sqlQueryTimeRef), - temporal = covariateSettings$temporal | covariateSettings$temporalSequence + sub_query = gsub(";", "", todo$sqlQueryTimeRef) ) sql <- SqlRender::translate( From 7bed36519ce2364d97e1975118a928d4bf18c750 Mon Sep 17 00:00:00 2001 From: jreps Date: Mon, 23 Feb 2026 08:38:40 -0500 Subject: [PATCH 13/17] Update DemographicsTime.sql explicitly specifying where the observation_period_ columns come from just in case the cohort table has these are columns --- inst/sql/sql_server/DemographicsTime.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/inst/sql/sql_server/DemographicsTime.sql b/inst/sql/sql_server/DemographicsTime.sql index c509b327..40763489 100644 --- a/inst/sql/sql_server/DemographicsTime.sql +++ b/inst/sql/sql_server/DemographicsTime.sql @@ -36,20 +36,20 @@ FROM ( cohort.@row_id_field AS row_id, } {@sub_type == 'priorObservation'} ? { - DATEDIFF(DAY, observation_period_start_date, cohort_start_date) AS days + DATEDIFF(DAY, op.observation_period_start_date, cohort_start_date) AS days } {@sub_type == 'postObservation'} ? { - DATEDIFF(DAY, cohort_start_date, observation_period_end_date) AS days + DATEDIFF(DAY, cohort_start_date, op.observation_period_end_date) AS days } {@sub_type == 'inCohort'} ? { DATEDIFF(DAY, cohort_start_date, cohort_end_date) AS days } FROM @cohort_table cohort {@sub_type != 'inCohort'} ? { - INNER JOIN @cdm_database_schema.observation_period + INNER JOIN @cdm_database_schema.observation_period op ON cohort.subject_id = observation_period.person_id - AND observation_period_start_date <= cohort_start_date - AND observation_period_end_date >= cohort_start_date + AND op.observation_period_start_date <= cohort_start_date + AND op.observation_period_end_date >= cohort_start_date } {@cohort_definition_id != -1} ? { WHERE cohort.cohort_definition_id IN (@cohort_definition_id)} ) raw_data; From 30c243dc2ad2f9abc287810d1c773d2750e99f9e Mon Sep 17 00:00:00 2001 From: jreps Date: Mon, 23 Feb 2026 09:01:36 -0500 Subject: [PATCH 14/17] Update DemographicsTime.sql missed period_id --- inst/sql/sql_server/DemographicsTime.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/sql/sql_server/DemographicsTime.sql b/inst/sql/sql_server/DemographicsTime.sql index 40763489..38d6784a 100644 --- a/inst/sql/sql_server/DemographicsTime.sql +++ b/inst/sql/sql_server/DemographicsTime.sql @@ -47,7 +47,7 @@ FROM ( FROM @cohort_table cohort {@sub_type != 'inCohort'} ? { INNER JOIN @cdm_database_schema.observation_period op - ON cohort.subject_id = observation_period.person_id + ON op.person_id = cohort.subject_id AND op.observation_period_start_date <= cohort_start_date AND op.observation_period_end_date >= cohort_start_date } From 4d8167db486b817dec56034228b8fb2a473fd955 Mon Sep 17 00:00:00 2001 From: jreps Date: Mon, 23 Feb 2026 09:29:29 -0500 Subject: [PATCH 15/17] Update GetDefaultCovariates.R fixing oracleTempSchema being used --- R/GetDefaultCovariates.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/GetDefaultCovariates.R b/R/GetDefaultCovariates.R index a6d17901..6a7551ea 100644 --- a/R/GetDefaultCovariates.R +++ b/R/GetDefaultCovariates.R @@ -164,7 +164,7 @@ getDbDefaultCovariateData <- function(connection, if(extractToAndromeda){ sql <- SqlRender::translate(sql = todo$sqlQueryFeatures, targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) + tempEmulationSchema = tempEmulationSchema) DatabaseConnector::querySqlToAndromeda( connection = connection, sql = sql, @@ -224,7 +224,7 @@ getDbDefaultCovariateData <- function(connection, if(extractToAndromeda){ sql <- SqlRender::translate(sql = todo$sqlQueryContinuousFeatures, targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) + tempEmulationSchema = tempEmulationSchema) DatabaseConnector::querySqlToAndromeda( connection = connection, sql = sql, @@ -290,7 +290,7 @@ getDbDefaultCovariateData <- function(connection, if(extractToAndromeda){ sql <- SqlRender::translate(sql = todo$sqlQueryFeatureRef, targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) + tempEmulationSchema = tempEmulationSchema) DatabaseConnector::querySqlToAndromeda( connection = connection, sql = sql, @@ -335,7 +335,7 @@ getDbDefaultCovariateData <- function(connection, if(extractToAndromeda){ sql <- SqlRender::translate(sql = todo$sqlQueryAnalysisRef, targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) + tempEmulationSchema = tempEmulationSchema) DatabaseConnector::querySqlToAndromeda( connection = connection, sql = sql, @@ -384,7 +384,7 @@ getDbDefaultCovariateData <- function(connection, if(extractToAndromeda){ sql <- SqlRender::translate(sql = todo$sqlQueryTimeRef, targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) + tempEmulationSchema = tempEmulationSchema) DatabaseConnector::querySqlToAndromeda( connection = connection, sql = sql, From 9e5a49e7e5d7e1f4505e5bf7a1dadc9ff7d013f4 Mon Sep 17 00:00:00 2001 From: Anthony Sena Date: Thu, 26 Feb 2026 07:53:57 -0500 Subject: [PATCH 16/17] Update roxygen2 comments, unit tests and fix errors (#322) * Updates to remove unnecessary tests, add back collision detection and to properly pass off parameters to covariate builder functions * Update docs * Fix parameter documentation and example --- R/GetCovariates.R | 6 -- R/GetCovariatesFromOtherCohorts.R | 2 + R/GetDefaultCovariates.R | 27 +++---- R/UnitTestHelperFunctions.R | 4 +- man/dot-getDbLooCovariateData.Rd | 5 +- man/getDbCohortBasedCovariatesData.Rd | 9 ++- man/getDbDefaultCovariateData.Rd | 20 ++--- tests/testthat/setup.R | 15 +++- tests/testthat/test-GetDefaultCovariates.R | 90 ---------------------- 9 files changed, 50 insertions(+), 128 deletions(-) diff --git a/R/GetCovariates.R b/R/GetCovariates.R index c3a03512..f2bf40f5 100644 --- a/R/GetCovariates.R +++ b/R/GetCovariates.R @@ -136,7 +136,6 @@ getDbCovariateData <- function(connectionDetails = NULL, cohortIds = c(-1), rowIdField = "subject_id", covariateSettings, - exportToTable = FALSE, createTable = exportToTable, dropTableIfExists = exportToTable, @@ -146,7 +145,6 @@ getDbCovariateData <- function(connectionDetails = NULL, targetCovariateRefTable = NULL, targetAnalysisRefTable = NULL, targetTimeRefTable = NULL, - aggregated = FALSE, minCharacterizationMean = 0, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), @@ -350,15 +348,11 @@ getDbCovariateData <- function(connectionDetails = NULL, cdmVersion = cdmVersion, rowIdField = rowIdField, covariateSettings = covariateSettings[[i]], - targetCovariateTable = targetCovariateTable, targetCovariateContinuousTable = targetCovariateContinuousTable, targetCovariateRefTable = targetCovariateRefTable, targetAnalysisRefTable = targetAnalysisRefTable, targetTimeRefTable = targetTimeRefTable, - dropTableIfExists = FALSE, # can remove this input - createTable = FALSE, # can remove this input - aggregated = aggregated, minCharacterizationMean = minCharacterizationMean ) diff --git a/R/GetCovariatesFromOtherCohorts.R b/R/GetCovariatesFromOtherCohorts.R index 6b6b7952..f295d77f 100644 --- a/R/GetCovariatesFromOtherCohorts.R +++ b/R/GetCovariatesFromOtherCohorts.R @@ -29,6 +29,7 @@ #' a permanent table in the \code{targetDatabaseSchema} or a temp table. If #' it is a temp table, do not specify \code{targetDatabaseSchema}. #' +#' @param targetCovariateContinuousTable (Optional) The name of the table where the resulting continuous covariates should be stored. #' @param targetCovariateRefTable (Optional) The name of the table where the covariate reference will be stored. #' #' @param targetAnalysisRefTable (Optional) The name of the table where the analysis reference will be stored. @@ -48,6 +49,7 @@ getDbCohortBasedCovariatesData <- function(connection, cdmVersion = "5", rowIdField = "subject_id", covariateSettings, + targetDatabaseSchema = NULL, targetCovariateTable = NULL, targetCovariateContinuousTable = NULL, targetCovariateRefTable = NULL, diff --git a/R/GetDefaultCovariates.R b/R/GetDefaultCovariates.R index 6a7551ea..6243635e 100644 --- a/R/GetDefaultCovariates.R +++ b/R/GetDefaultCovariates.R @@ -29,7 +29,7 @@ #' be stored. If not provided, results will be fetched to R. The table can be #' a permanent table in the \code{targetDatabaseSchema} or a temp table. If #' it is a temp table, do not specify \code{targetDatabaseSchema}. -#' +#' @param targetCovariateContinuousTable (Optional) The name of the table where the resulting continuous covariates should be stored. #' @param targetCovariateRefTable (Optional) The name of the table where the covariate reference will be stored. #' #' @param targetAnalysisRefTable (Optional) The name of the table where the analysis reference will be stored. @@ -37,11 +37,6 @@ #' @param minCharacterizationMean The minimum mean value for binary characterization output. Values below this will be cut off from output. This #' will help reduce the file size of the characterization output, but will remove information #' on covariates that have very low values. The default is 0. -#' -#' -#' @param dropTableIfExists If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged -#' into existing table data. Overides createTable. -#' @param createTable Run sql to create table? Code does not check if table exists. #' @template GetCovarParams #' #' @examples @@ -59,9 +54,7 @@ #' connection = connection, #' cdmDatabaseSchema = "main", #' cohortTable = "cohort", -#' covariateSettings = createDefaultCovariateSettings(), -#' targetDatabaseSchema = "main", -#' targetCovariateTable = "ut_cov" +#' covariateSettings = createDefaultCovariateSettings() #' ) #' } #' @export @@ -74,15 +67,12 @@ getDbDefaultCovariateData <- function(connection, cdmVersion = "5", rowIdField = "subject_id", covariateSettings, - + targetDatabaseSchema = NULL, targetCovariateTable = NULL, targetCovariateContinuousTable = NULL, targetCovariateRefTable = NULL, targetAnalysisRefTable = NULL, targetTimeRefTable = NULL, - - dropTableIfExists = FALSE, - createTable = TRUE, aggregated = FALSE, minCharacterizationMean = 0, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")) { @@ -298,6 +288,17 @@ getDbDefaultCovariateData <- function(connection, andromedaTableName = "covariateRef", snakeCaseToCamelCase = TRUE ) + + collisions <- covariateData$covariateRef %>% + dplyr::filter(collisions > 0) %>% + dplyr::collect() + + if (nrow(collisions) > 0) { + warning(sprintf( + "Collisions in covariate IDs detected for post-coordinated concepts with covariate IDs %s", + paste(collisions$covariateId, paste = ", ") + )) + } } else{ sql <- " INSERT INTO @target_covariate_ref_table( diff --git a/R/UnitTestHelperFunctions.R b/R/UnitTestHelperFunctions.R index 205b598b..d2bce2b1 100644 --- a/R/UnitTestHelperFunctions.R +++ b/R/UnitTestHelperFunctions.R @@ -59,6 +59,7 @@ #' @param minCharacterizationMean The minimum mean value for binary characterization output. Values below this will be cut off from output. This #' will help reduce the file size of the characterization output, but will remove information #' on covariates that have very low values. The default is 0. +#' @param ... Additional arguments, not used. #' @return #' Returns an object of type \code{covariateData}, containing information on the covariates. #' @@ -71,7 +72,8 @@ rowIdField = "subject_id", covariateSettings, aggregated = FALSE, - minCharacterizationMean = 0) { + minCharacterizationMean = 0, + ...) { writeLines("Constructing length of observation covariates") if (covariateSettings$useLengthOfObs == FALSE) { return(NULL) diff --git a/man/dot-getDbLooCovariateData.Rd b/man/dot-getDbLooCovariateData.Rd index ab94c1af..7524281f 100644 --- a/man/dot-getDbLooCovariateData.Rd +++ b/man/dot-getDbLooCovariateData.Rd @@ -14,7 +14,8 @@ rowIdField = "subject_id", covariateSettings, aggregated = FALSE, - minCharacterizationMean = 0 + minCharacterizationMean = 0, + ... ) } \arguments{ @@ -54,6 +55,8 @@ cohort entry?} \item{minCharacterizationMean}{The minimum mean value for binary characterization output. Values below this will be cut off from output. This will help reduce the file size of the characterization output, but will remove information on covariates that have very low values. The default is 0.} + +\item{...}{Additional arguments, not used.} } \value{ Returns an object of type \code{covariateData}, containing information on the covariates. diff --git a/man/getDbCohortBasedCovariatesData.Rd b/man/getDbCohortBasedCovariatesData.Rd index e7e5e668..510934bb 100644 --- a/man/getDbCohortBasedCovariatesData.Rd +++ b/man/getDbCohortBasedCovariatesData.Rd @@ -14,6 +14,7 @@ getDbCohortBasedCovariatesData( cdmVersion = "5", rowIdField = "subject_id", covariateSettings, + targetDatabaseSchema = NULL, targetCovariateTable = NULL, targetCovariateContinuousTable = NULL, targetCovariateRefTable = NULL, @@ -59,11 +60,16 @@ is more than one period per person.} \code{\link{createCohortBasedCovariateSettings}} or \code{\link{createCohortBasedTemporalCovariateSettings}} functions.} +\item{targetDatabaseSchema}{(Optional) The name of the database schema where the resulting covariates +should be stored. If not provided, results will be fetched to R.} + \item{targetCovariateTable}{(Optional) The name of the table where the resulting covariates will be stored. If not provided, results will be fetched to R. The table can be a permanent table in the \code{targetDatabaseSchema} or a temp table. If it is a temp table, do not specify \code{targetDatabaseSchema}.} +\item{targetCovariateContinuousTable}{(Optional) The name of the table where the resulting continuous covariates should be stored.} + \item{targetCovariateRefTable}{(Optional) The name of the table where the covariate reference will be stored.} \item{targetAnalysisRefTable}{(Optional) The name of the table where the analysis reference will be stored.} @@ -80,9 +86,6 @@ on covariates that have very low values. The default is 0.} \item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp tables, provide a schema with write privileges where temp tables can be created.} - -\item{targetDatabaseSchema}{(Optional) The name of the database schema where the resulting covariates -should be stored. If not provided, results will be fetched to R.} } \value{ Returns an object of type \code{CovariateData}, which is an Andromeda object containing information on the baseline covariates. diff --git a/man/getDbDefaultCovariateData.Rd b/man/getDbDefaultCovariateData.Rd index aa1ae71d..99dda8c9 100644 --- a/man/getDbDefaultCovariateData.Rd +++ b/man/getDbDefaultCovariateData.Rd @@ -14,13 +14,12 @@ getDbDefaultCovariateData( cdmVersion = "5", rowIdField = "subject_id", covariateSettings, + targetDatabaseSchema = NULL, targetCovariateTable = NULL, targetCovariateContinuousTable = NULL, targetCovariateRefTable = NULL, targetAnalysisRefTable = NULL, targetTimeRefTable = NULL, - dropTableIfExists = FALSE, - createTable = TRUE, aggregated = FALSE, minCharacterizationMean = 0, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema") @@ -60,22 +59,22 @@ is more than one period per person.} \item{covariateSettings}{Either an object of type \code{covariateSettings} as created using one of the createCovariate functions, or a list of such objects.} +\item{targetDatabaseSchema}{(Optional) The name of the database schema where the resulting covariates +should be stored. If not provided, results will be fetched to R.} + \item{targetCovariateTable}{(Optional) The name of the table where the resulting covariates will be stored. If not provided, results will be fetched to R. The table can be a permanent table in the \code{targetDatabaseSchema} or a temp table. If it is a temp table, do not specify \code{targetDatabaseSchema}.} +\item{targetCovariateContinuousTable}{(Optional) The name of the table where the resulting continuous covariates should be stored.} + \item{targetCovariateRefTable}{(Optional) The name of the table where the covariate reference will be stored.} \item{targetAnalysisRefTable}{(Optional) The name of the table where the analysis reference will be stored.} \item{targetTimeRefTable}{(Optional) The name of the table for the time reference} -\item{dropTableIfExists}{If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged -into existing table data. Overides createTable.} - -\item{createTable}{Run sql to create table? Code does not check if table exists.} - \item{aggregated}{Should aggregate statistics be computed instead of covariates per cohort entry?} @@ -86,9 +85,6 @@ on covariates that have very low values. The default is 0.} \item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp tables, provide a schema with write privileges where temp tables can be created.} - -\item{targetDatabaseSchema}{(Optional) The name of the database schema where the resulting covariates -should be stored. If not provided, results will be fetched to R.} } \value{ Returns an object of type \code{CovariateData}, which is an Andromeda object containing information on the baseline covariates. @@ -128,9 +124,7 @@ results <- getDbDefaultCovariateData( connection = connection, cdmDatabaseSchema = "main", cohortTable = "cohort", - covariateSettings = createDefaultCovariateSettings(), - targetDatabaseSchema = "main", - targetCovariateTable = "ut_cov" + covariateSettings = createDefaultCovariateSettings() ) } } diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index dd59bcfa..2007a292 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -2,6 +2,19 @@ library(testthat) library(FeatureExtraction) library(dplyr) +# AGS: This rJava code block was used to add the Java dependencies to the classpath for testing. It is needed to run the tests on all platforms, but it is not needed when running individual test files in RStudio, which is why it is not included in the helper functions file. If we want to run individual test files, we can use the loadRenderTranslateUnitTestSql function defined below, which also adds the Java dependencies to the classpath if they are not already there. +# library(rJava) +# .jinit() +# jar_dirs <- c( +# system.file("java", package = "DatabaseConnector"), +# system.file("java", package = "SqlRender"), +# system.file("java", package = "FeatureExtraction") +# ) +# jar_files <- unlist( +# lapply(jar_dirs, list.files, pattern = "\\.jar$", full.names = TRUE) +# ) +# .jaddClassPath(jar_files) + dbms <- getOption("dbms", default = "sqlite") message("************* Testing on ", dbms, " *************\n") @@ -98,7 +111,7 @@ checkRemoteFileAvailable <- function(remoteFile) { } # Then stop if status > 400 if (httr::http_error(resp)) { - message_for_status(resp) + httr::message_for_status(resp) return(NULL) } return("success") diff --git a/tests/testthat/test-GetDefaultCovariates.R b/tests/testthat/test-GetDefaultCovariates.R index 8c674acd..44118277 100644 --- a/tests/testthat/test-GetDefaultCovariates.R +++ b/tests/testthat/test-GetDefaultCovariates.R @@ -42,93 +42,3 @@ test_that("Test exit conditions", { )) }) -test_that("Test target table", { - skip_on_cran() - skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) - - results <- getDbDefaultCovariateData(connection = eunomiaConnection, - cdmDatabaseSchema = "main", - cohortTable = "cohort", - covariateSettings = createDefaultCovariateSettings(), - targetDatabaseSchema = "main", - targetTables = list(covariates = "ut_cov", - covariateRef = "ut_cov_ref", - analysisRef = "ut_cov_analysis_ref")) - - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov_ref")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov_analysis_ref")[1], 1) - - results <- getDbDefaultCovariateData(connection = eunomiaConnection, - cdmDatabaseSchema = "main", - cohortTable = "cohort", - covariateSettings = createDefaultCovariateSettings(), - targetDatabaseSchema = "main", - aggregated = TRUE, - targetTables = list(covariates = "ut_cov_agg", - covariateRef = "ut_cov_ref_agg", - analysisRef = "ut_cov_analysis_ref_agg")) - - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov_agg")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov_ref_agg")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM main.ut_cov_analysis_ref_agg")[1], 1) - - # Temp tables with old prototype - results <- getDbDefaultCovariateData(connection = eunomiaConnection, - cdmDatabaseSchema = "main", - cohortTable = "cohort", - covariateSettings = createDefaultCovariateSettings(), - aggregated = TRUE, - targetCovariateTable = "#ut_cov_agg", - targetAnalysisRefTable = "#ut_cov_ref_agg", - targetCovariateRefTable = "#ut_cov_anal_ref_agg") - - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_agg")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_ref_agg")[1], 1) - expect_gt(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_anal_ref_agg")[1], 1) - - results <- getDbDefaultCovariateData(connection = eunomiaConnection, - cdmDatabaseSchema = "main", - cohortTable = "cohort", - covariateSettings = createDefaultCovariateSettings(), - targetCovariateTable = "#ut_cov", - targetAnalysisRefTable = "#ut_cov_ref", - targetCovariateRefTable = "#ut_cov_analysis_ref") - - covCt <- DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov")[1] - expect_gt(covCt, 1) - covRefCt <- DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_ref")[1] - expect_gt(covRefCt, 1) - anlRefCt <- DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_analysis_ref")[1] - expect_gt(anlRefCt, 1) - - # append results rather than deleting the tables - results <- getDbDefaultCovariateData(connection = eunomiaConnection, - cdmDatabaseSchema = "main", - cohortTable = "cohort", - covariateSettings = createDefaultCovariateSettings(), - createTable = FALSE, - dropTableIfExists = FALSE, - targetCovariateTable = "#ut_cov", - targetAnalysisRefTable = "#ut_cov_ref", - targetCovariateRefTable = "#ut_cov_analysis_ref") - - expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov")[1], covCt * 2) - expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_ref")[1], covRefCt * 2) - expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_analysis_ref")[1], anlRefCt * 2) - - # Recreate tables (and check create override works) - results <- getDbDefaultCovariateData(connection = eunomiaConnection, - cdmDatabaseSchema = "main", - cohortTable = "cohort", - covariateSettings = createDefaultCovariateSettings(), - createTable = FALSE, - dropTableIfExists = TRUE, - targetCovariateTable = "#ut_cov", - targetAnalysisRefTable = "#ut_cov_ref", - targetCovariateRefTable = "#ut_cov_analysis_ref") - - expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov")[1], covCt) - expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_ref")[1], covRefCt) - expect_equal(DatabaseConnector::renderTranslateQuerySql(eunomiaConnection, "SELECT COUNT(*) FROM #ut_cov_analysis_ref")[1], anlRefCt) -}) From 059329766a32c1f25bacba5024688c133c7424ac Mon Sep 17 00:00:00 2001 From: Anthony Sena Date: Sat, 28 Feb 2026 08:57:13 -0500 Subject: [PATCH 17/17] Bump version and date in description and update news.md --- DESCRIPTION | 4 ++-- NEWS.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8092ca01..9ceb6d45 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: FeatureExtraction Type: Package Title: Generating Features for a Cohort -Version: 3.12.0 -Date: 2025-10-28 +Version: 3.13.0 +Date: 2026-02-28 Authors@R: c( person("Martijn", "Schuemie", , "schuemie@ohdsi.org", role = c("aut")), person("Marc", "Suchard", role = c("aut")), diff --git a/NEWS.md b/NEWS.md index af779029..9bdbe657 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,8 +3,8 @@ FeatureExtraction 3.13.0 New Features: -- Added ability to store aggregate results from `getDbDefaultCovariateData` in the database and added -ability to control all target tables with new `targetTables` list parameter +- Added ability to store aggregate results from `getDbCovariateData` in the database and added +ability to control all target tables with new `target*Table` parameters (#152, #321) Bugfixes: