diff --git a/.Rbuildignore b/.Rbuildignore index a152430..def897d 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,5 @@ +^renv$ +^renv\.lock$ pom.xml extras docs diff --git a/DESCRIPTION b/DESCRIPTION index 4c1e6a3..521f730 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: FeatureExtraction Type: Package Title: Generating Features for a Cohort -Version: 3.12.0 -Date: 2025-10-28 +Version: 3.13.0 +Date: 2026-03-05 Authors@R: c( person("Martijn", "Schuemie", , "schuemie@ohdsi.org", role = c("aut")), person("Marc", "Suchard", role = c("aut")), @@ -46,6 +46,6 @@ VignetteBuilder: knitr URL: https://github.com/OHDSI/FeatureExtraction BugReports: https://github.com/OHDSI/FeatureExtraction/issues NeedsCompilation: no -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Encoding: UTF-8 Language: en-US diff --git a/NEWS.md b/NEWS.md index 6ddbdc4..9bdbe65 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,15 @@ +FeatureExtraction 3.13.0 +======================= + +New Features: + +- Added ability to store aggregate results from `getDbCovariateData` in the database and added +ability to control all target tables with new `target*Table` parameters (#152, #321) + +Bugfixes: + +- Fixed tests and made sure storage of covariates with `getDbDefaultCovariateData` works and is consistent + FeatureExtraction 3.12.0 ======================= diff --git a/R/Aggregation.R b/R/Aggregation.R index 53a48e6..afb70f4 100644 --- a/R/Aggregation.R +++ b/R/Aggregation.R @@ -1,4 +1,4 @@ -# Copyright 2025 Observational Health Data Sciences and Informatics +# Copyright 2026 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # diff --git a/R/CompareCohorts.R b/R/CompareCohorts.R index fffa07f..c1f7beb 100644 --- a/R/CompareCohorts.R +++ b/R/CompareCohorts.R @@ -1,4 +1,4 @@ -# Copyright 2025 Observational Health Data Sciences and Informatics +# Copyright 2026 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # diff --git a/R/CovariateData.R b/R/CovariateData.R index 3a0dfff..772ac09 100644 --- a/R/CovariateData.R +++ b/R/CovariateData.R @@ -1,4 +1,4 @@ -# Copyright 2025 Observational Health Data Sciences and Informatics +# Copyright 2026 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # diff --git a/R/DefaultCovariateSettings.R b/R/DefaultCovariateSettings.R index 4a45f71..b5acf68 100644 --- a/R/DefaultCovariateSettings.R +++ b/R/DefaultCovariateSettings.R @@ -1,4 +1,4 @@ -# Copyright 2025 Observational Health Data Sciences and Informatics +# Copyright 2026 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # diff --git a/R/DefaultTemporalCovariateSettings.R b/R/DefaultTemporalCovariateSettings.R index 5d1004b..c0bba9b 100644 --- a/R/DefaultTemporalCovariateSettings.R +++ b/R/DefaultTemporalCovariateSettings.R @@ -1,4 +1,4 @@ -# Copyright 2025 Observational Health Data Sciences and Informatics +# Copyright 2026 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # diff --git a/R/DefaultTemporalSequenceCovariateSettings.R b/R/DefaultTemporalSequenceCovariateSettings.R index aac1c59..e40d297 100644 --- a/R/DefaultTemporalSequenceCovariateSettings.R +++ b/R/DefaultTemporalSequenceCovariateSettings.R @@ -1,4 +1,4 @@ -# Copyright 2025 Observational Health Data Sciences and Informatics +# Copyright 2026 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # diff --git a/R/DetailedCovariateSettings.R b/R/DetailedCovariateSettings.R index 2ea8132..92d6dd8 100644 --- a/R/DetailedCovariateSettings.R +++ b/R/DetailedCovariateSettings.R @@ -1,4 +1,4 @@ -# Copyright 2025 Observational Health Data Sciences and Informatics +# Copyright 2026 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # diff --git a/R/FeatureExtraction.R b/R/FeatureExtraction.R index 7132f6c..6346da6 100644 --- a/R/FeatureExtraction.R +++ b/R/FeatureExtraction.R @@ -1,6 +1,6 @@ # @file FeatureExtraction.R # -# Copyright 2025 Observational Health Data Sciences and Informatics +# Copyright 2026 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # diff --git a/R/GetCovariates.R b/R/GetCovariates.R index cf2793c..94b55c0 100644 --- a/R/GetCovariates.R +++ b/R/GetCovariates.R @@ -1,4 +1,4 @@ -# Copyright 2025 Observational Health Data Sciences and Informatics +# Copyright 2026 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -72,6 +72,29 @@ #' @param covariateCohortDatabaseSchema The database schema where the cohorts used to define the covariates can be found. #' @param covariateCohortTable The table where the cohorts used to define the covariates can be found. #' +#' @param exportToTable Whether to export to a table rather than Andromeda object +#' @param dropTableIfExists If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged +#' into existing table data. Overides createTable. +#' @param createTable Run sql to create table? Code does not check if table exists. +#' @param targetDatabaseSchema (Optional) The name of the database schema where the resulting covariates +#' should be stored as a table. If not provided, results will be fetched to R. +#' @param targetCovariateTable (Optional) The name of the table where the resulting covariates will +#' be stored. If not provided, results will be fetched to R. The table can be +#' a permanent table in the \code{targetDatabaseSchema} or a temp table. If +#' it is a temp table, do not specify \code{targetDatabaseSchema}. +#' @param targetCovariateContinuousTable (Optional) The name of the table where the resulting continuous covariates will +#' be stored. If not provided, results will be fetched to R. The table can be +#' a permanent table in the \code{targetDatabaseSchema} or a temp table. If +#' it is a temp table, do not specify \code{targetDatabaseSchema}. +#' @param targetCovariateRefTable (Optional) The name of the table where the covariate reference will be stored. If +#' it is a temp table, do not specify \code{targetDatabaseSchema}. +#' +#' @param targetAnalysisRefTable (Optional) The name of the table where the analysis reference will be stored. If +#' it is a temp table, do not specify \code{targetDatabaseSchema}. +#' @param targetTimeRefTable (Optional) The name of the table for the time reference. If +#' it is a temp table, do not specify \code{targetDatabaseSchema}. +#' +#' #' @return #' Returns an object of type \code{covariateData}, containing information on the covariates. #' @@ -113,6 +136,15 @@ getDbCovariateData <- function(connectionDetails = NULL, cohortIds = c(-1), rowIdField = "subject_id", covariateSettings, + exportToTable = FALSE, + createTable = exportToTable, + dropTableIfExists = exportToTable, + targetDatabaseSchema = NULL, + targetCovariateTable = NULL, + targetCovariateContinuousTable = NULL, + targetCovariateRefTable = NULL, + targetAnalysisRefTable = NULL, + targetTimeRefTable = NULL, aggregated = FALSE, minCharacterizationMean = 0, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), @@ -155,6 +187,115 @@ getDbCovariateData <- function(connectionDetails = NULL, } else { cohortDatabaseSchemaTable <- paste(cohortDatabaseSchema, cohortTable, sep = ".") } + + # check for temporal features in any of the settings + if (inherits(covariateSettings, "covariateSettings")) { + anyTemporal <- covariateSettings$temporal | covariateSettings$temporalSequence + } else { + anyTemporal <- sum(unlist(lapply( + X = covariateSettings, + FUN = function(x) { + sum(c(x$temporal, x$temporalSequence)) == 1 + } + ))) > 0 + } + + # Create export tables + # figure out tables + if (exportToTable) { + if (is.null(targetDatabaseSchema)) { + # turn off create table since the tables are temp + tempOutputTables <- TRUE + # covariate tables + if (substr(targetCovariateTable, 1, 1) == "#") { + targetCovariateTable <- targetCovariateTable + } else { + targetCovariateTable <- paste0("#", targetCovariateTable) + } + # cov cont table + if (substr(targetCovariateContinuousTable, 1, 1) == "#") { + targetCovariateContinuousTable <- targetCovariateContinuousTable + } else { + targetCovariateContinuousTable <- paste0("#", targetCovariateContinuousTable) + } + # cov ref table + if (substr(targetCovariateRefTable, 1, 1) == "#") { + targetCovariateRefTable <- targetCovariateRefTable + } else { + targetCovariateRefTable <- paste0("#", targetCovariateRefTable) + } + # analysis ref table + if (substr(targetAnalysisRefTable, 1, 1) == "#") { + targetAnalysisRefTable <- targetAnalysisRefTable + } else { + targetAnalysisRefTable <- paste0("#", targetAnalysisRefTable) + } + # time ref table + if (substr(targetTimeRefTable, 1, 1) == "#") { + targetTimeRefTable <- targetTimeRefTable + } else { + targetTimeRefTable <- paste0("#", targetTimeRefTable) + } + } else { + tempOutputTables <- FALSE + targetCovariateTable <- paste(targetDatabaseSchema, targetCovariateTable, sep = ".") + targetCovariateContinuousTable <- paste(targetDatabaseSchema, targetCovariateContinuousTable, sep = ".") + targetCovariateRefTable <- paste(targetDatabaseSchema, targetCovariateRefTable, sep = ".") + targetAnalysisRefTable <- paste(targetDatabaseSchema, targetAnalysisRefTable, sep = ".") + targetTimeRefTable <- paste(targetDatabaseSchema, targetTimeRefTable, sep = ".") + } + + # drop table if required + if (dropTableIfExists) { + message("Dropping export tables") + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = "DropExportTables.sql", + packageName = "FeatureExtraction", + dbms = attr(connection, "dbms"), + tempEmulationSchema = tempEmulationSchema, + temp_tables = tempOutputTables, + covariate_table = targetCovariateTable, + covariate_continuous_table = targetCovariateContinuousTable, + covariate_ref_table = targetCovariateRefTable, + analysis_ref_table = targetAnalysisRefTable, + time_ref_table = targetTimeRefTable + ) + + DatabaseConnector::executeSql( + connection = connection, + sql = sql + ) + } + + if (dropTableIfExists & !createTable) { + stop("Seem to be exporting to tables but create table is FALSE and dropTable is TRUE") + } + + # create the cohort tables if required + if (createTable) { + message("Creating export tables") + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = "CreateExportTables.sql", + packageName = "FeatureExtraction", + dbms = attr(connection, "dbms"), + tempEmulationSchema = tempEmulationSchema, + aggregated = aggregated, + temporal = anyTemporal, + row_id_field = "row_id", + covariate_table = targetCovariateTable, + covariate_continuous_table = targetCovariateContinuousTable, + covariate_ref_table = targetCovariateRefTable, + analysis_ref_table = targetAnalysisRefTable, + time_ref_table = targetTimeRefTable + ) + + DatabaseConnector::executeSql( + connection = connection, + sql = sql + ) + } + } + sql <- "SELECT cohort_definition_id, COUNT_BIG(*) AS population_size FROM @cohort_database_schema_table {@cohort_ids != -1} ? {WHERE cohort_definition_id IN (@cohort_ids)} GROUP BY cohort_definition_id;" sql <- SqlRender::render( sql = sql, @@ -192,6 +333,7 @@ getDbCovariateData <- function(connectionDetails = NULL, covariateCohortTable ) } + for (i in 1:length(covariateSettings)) { fun <- attr(covariateSettings[[i]], "fun") args <- list( @@ -203,6 +345,11 @@ getDbCovariateData <- function(connectionDetails = NULL, cdmVersion = cdmVersion, rowIdField = rowIdField, covariateSettings = covariateSettings[[i]], + targetCovariateTable = targetCovariateTable, + targetCovariateContinuousTable = targetCovariateContinuousTable, + targetCovariateRefTable = targetCovariateRefTable, + targetAnalysisRefTable = targetAnalysisRefTable, + targetTimeRefTable = targetTimeRefTable, aggregated = aggregated, minCharacterizationMean = minCharacterizationMean ) @@ -225,25 +372,35 @@ getDbCovariateData <- function(connectionDetails = NULL, covariateData$covariatesContinuous <- tempCovariateData$covariatesContinuous } - Andromeda::appendToTable(covariateData$covariateRef, tempCovariateData$covariateRef) - Andromeda::appendToTable(covariateData$analysisRef, tempCovariateData$analysisRef) - for (name in names(attr(tempCovariateData, "metaData"))) { - if (is.null(attr(covariateData, "metaData")[[name]])) { - attr(covariateData, "metaData")[[name]] <- attr(tempCovariateData, "metaData")[[name]] - } else { - attr(covariateData, "metaData")[[name]] <- list( - c( - unlist(attr(covariateData, "metaData")[[name]]), - attr(tempCovariateData, "metaData")[[name]] + if (hasData(tempCovariateData$covariateRef)) { + Andromeda::appendToTable(covariateData$covariateRef, tempCovariateData$covariateRef) + } + if (hasData(tempCovariateData$analysisRef)) { + Andromeda::appendToTable(covariateData$analysisRef, tempCovariateData$analysisRef) + } + + if (!exportToTable) { + for (name in names(attr(tempCovariateData, "metaData"))) { + if (is.null(attr(covariateData, "metaData")[[name]])) { + attr(covariateData, "metaData")[[name]] <- attr(tempCovariateData, "metaData")[[name]] + } else { + attr(covariateData, "metaData")[[name]] <- list( + c( + unlist(attr(covariateData, "metaData")[[name]]), + attr(tempCovariateData, "metaData")[[name]] + ) ) - ) + } } - } + } # if not exporting } } } - attr(covariateData, "metaData")$populationSize <- populationSize - attr(covariateData, "metaData")$cohortIds <- cohortIds + + if (!is.null(covariateData)) { + attr(covariateData, "metaData")$populationSize <- populationSize + attr(covariateData, "metaData")$cohortIds <- cohortIds + } } - return(covariateData) + return(invisible(covariateData)) } diff --git a/R/GetCovariatesFromCohortAttributes.R b/R/GetCovariatesFromCohortAttributes.R index 47b3b58..9e265f9 100644 --- a/R/GetCovariatesFromCohortAttributes.R +++ b/R/GetCovariatesFromCohortAttributes.R @@ -1,4 +1,4 @@ -# Copyright 2025 Observational Health Data Sciences and Informatics +# Copyright 2026 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # diff --git a/R/GetCovariatesFromOtherCohorts.R b/R/GetCovariatesFromOtherCohorts.R index 947efc9..dca900e 100644 --- a/R/GetCovariatesFromOtherCohorts.R +++ b/R/GetCovariatesFromOtherCohorts.R @@ -1,4 +1,4 @@ -# Copyright 2025 Observational Health Data Sciences and Informatics +# Copyright 2026 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -22,6 +22,18 @@ #' @param covariateSettings An object of type \code{covariateSettings} as created using the #' \code{\link{createCohortBasedCovariateSettings}} or #' \code{\link{createCohortBasedTemporalCovariateSettings}} functions. +#' @param targetDatabaseSchema (Optional) The name of the database schema where the resulting covariates +#' should be stored. If not provided, results will be fetched to R. +#' @param targetCovariateTable (Optional) The name of the table where the resulting covariates will +#' be stored. If not provided, results will be fetched to R. The table can be +#' a permanent table in the \code{targetDatabaseSchema} or a temp table. If +#' it is a temp table, do not specify \code{targetDatabaseSchema}. +#' +#' @param targetCovariateContinuousTable (Optional) The name of the table where the resulting continuous covariates should be stored. +#' @param targetCovariateRefTable (Optional) The name of the table where the covariate reference will be stored. +#' +#' @param targetAnalysisRefTable (Optional) The name of the table where the analysis reference will be stored. +#' @param targetTimeRefTable (Optional) The name of the table for the time reference #' @param minCharacterizationMean The minimum mean value for binary characterization output. Values below this will be cut off from output. This #' will help reduce the file size of the characterization output, but will remove information #' on covariates that have very low values. The default is 0. @@ -37,6 +49,12 @@ getDbCohortBasedCovariatesData <- function(connection, cdmVersion = "5", rowIdField = "subject_id", covariateSettings, + targetDatabaseSchema = NULL, + targetCovariateTable = NULL, + targetCovariateContinuousTable = NULL, + targetCovariateRefTable = NULL, + targetAnalysisRefTable = NULL, + targetTimeRefTable = NULL, aggregated = FALSE, minCharacterizationMean = 0, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")) { @@ -153,6 +171,11 @@ getDbCohortBasedCovariatesData <- function(connection, cdmVersion = cdmVersion, rowIdField = rowIdField, covariateSettings = detailledSettings, + targetCovariateTable = targetCovariateTable, + targetCovariateContinuousTable = targetCovariateContinuousTable, + targetCovariateRefTable = targetCovariateRefTable, + targetAnalysisRefTable = targetAnalysisRefTable, + targetTimeRefTable = targetTimeRefTable, aggregated = aggregated, minCharacterizationMean = minCharacterizationMean ) diff --git a/R/GetDefaultCovariates.R b/R/GetDefaultCovariates.R index 86a09c8..849d227 100644 --- a/R/GetDefaultCovariates.R +++ b/R/GetDefaultCovariates.R @@ -1,4 +1,4 @@ -# Copyright 2025 Observational Health Data Sciences and Informatics +# Copyright 2026 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -24,17 +24,19 @@ #' @param covariateSettings Either an object of type \code{covariateSettings} as created using one #' of the createCovariate functions, or a list of such objects. #' @param targetDatabaseSchema (Optional) The name of the database schema where the resulting covariates -#' should be stored. +#' should be stored. If not provided, results will be fetched to R. #' @param targetCovariateTable (Optional) The name of the table where the resulting covariates will #' be stored. If not provided, results will be fetched to R. The table can be #' a permanent table in the \code{targetDatabaseSchema} or a temp table. If #' it is a temp table, do not specify \code{targetDatabaseSchema}. +#' @param targetCovariateContinuousTable (Optional) The name of the table where the resulting continuous covariates should be stored. #' @param targetCovariateRefTable (Optional) The name of the table where the covariate reference will be stored. +#' #' @param targetAnalysisRefTable (Optional) The name of the table where the analysis reference will be stored. +#' @param targetTimeRefTable (Optional) The name of the table for the time reference #' @param minCharacterizationMean The minimum mean value for binary characterization output. Values below this will be cut off from output. This #' will help reduce the file size of the characterization output, but will remove information #' on covariates that have very low values. The default is 0. -#' #' @template GetCovarParams #' #' @examples @@ -52,9 +54,7 @@ #' connection = connection, #' cdmDatabaseSchema = "main", #' cohortTable = "cohort", -#' covariateSettings = createDefaultCovariateSettings(), -#' targetDatabaseSchema = "main", -#' targetCovariateTable = "ut_cov" +#' covariateSettings = createDefaultCovariateSettings() #' ) #' } #' @export @@ -67,10 +67,12 @@ getDbDefaultCovariateData <- function(connection, cdmVersion = "5", rowIdField = "subject_id", covariateSettings, - targetDatabaseSchema, - targetCovariateTable, - targetCovariateRefTable, - targetAnalysisRefTable, + targetDatabaseSchema = NULL, + targetCovariateTable = NULL, + targetCovariateContinuousTable = NULL, + targetCovariateRefTable = NULL, + targetAnalysisRefTable = NULL, + targetTimeRefTable = NULL, aggregated = FALSE, minCharacterizationMean = 0, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")) { @@ -80,9 +82,7 @@ getDbDefaultCovariateData <- function(connection, if (cdmVersion == "4") { stop("Common Data Model version 4 is not supported") } - if (!missing(targetCovariateTable) && !is.null(targetCovariateTable) && aggregated) { - stop("Writing aggregated results to database is currently not supported") - } + if (!missing(cohortId)) { warning("cohortId argument has been deprecated, please use cohortIds") cohortIds <- cohortId @@ -99,6 +99,19 @@ getDbDefaultCovariateData <- function(connection, checkmate::assertNumeric(x = minCharacterizationMean, lower = 0, upper = 1, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) + + targetTables <- list( + covariates = targetCovariateTable, + covariatesContinuous = targetCovariateContinuousTable, + covariateRef = targetCovariateRefTable, + analysisRef = targetAnalysisRefTable, + timeRef = targetTimeRefTable + ) + # Is the target schema missing or are all the specified tables temp + allTempTables <- all(substr(targetTables, 1, 1) == "#") + extractToAndromeda <- is.null(targetCovariateTable) + + settings <- .toJson(covariateSettings) rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$init(system.file("", package = "FeatureExtraction")) json <- rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$createSql( @@ -129,18 +142,20 @@ getDbDefaultCovariateData <- function(connection, profile <- (!is.null(getOption("dbProfile")) && getOption("dbProfile") == TRUE) DatabaseConnector::executeSql(connection, sql, profile = profile) - if (missing(targetCovariateTable) || is.null(targetCovariateTable)) { - ParallelLogger::logInfo("Fetching data from server") - start <- Sys.time() - # Binary or non-aggregated features - covariateData <- Andromeda::andromeda() - if (!is.null(todo$sqlQueryFeatures)) { + # Now we extract the results into Andromeda tables or as tables + ParallelLogger::logInfo("Fetching data from server") + start <- Sys.time() + covariateData <- Andromeda::andromeda() + + # Binary or non-aggregated features + if (!is.null(todo$sqlQueryFeatures)) { + # etracting covariate table + if (extractToAndromeda) { sql <- SqlRender::translate( sql = todo$sqlQueryFeatures, targetDialect = attr(connection, "dbms"), tempEmulationSchema = tempEmulationSchema ) - DatabaseConnector::querySqlToAndromeda( connection = connection, sql = sql, @@ -148,10 +163,52 @@ getDbDefaultCovariateData <- function(connection, andromedaTableName = "covariates", snakeCaseToCamelCase = TRUE ) + } else { + # for testing to see column order + # print(todo$sqlQueryFeatures) + + sql <- " + INSERT INTO @target_covariate_table( + + {@temporal | @temporal_sequence} ? {time_id,} + + {@aggregated}?{ + cohort_definition_id, + covariate_id, + sum_value, + average_value + }:{ + covariate_id, + row_id, + covariate_value + } + + ) @sub_query; " + + sql <- SqlRender::render( + sql = sql, + target_covariate_table = targetTables$covariates, + sub_query = gsub(";", "", todo$sqlQueryFeatures), + temporal = covariateSettings$temporal, + temporal_sequence = covariateSettings$temporalSequence, + aggregated = aggregated + ) + + sql <- SqlRender::translate( + sql = sql, + targetDialect = DatabaseConnector::dbms(connection), + tempEmulationSchema = tempEmulationSchema + ) + DatabaseConnector::executeSql( + connection = connection, + sql = sql + ) } + } - # Continuous aggregated features - if (!is.null(todo$sqlQueryContinuousFeatures)) { + # Continuous aggregated features + if (!is.null(todo$sqlQueryContinuousFeatures)) { + if (extractToAndromeda) { sql <- SqlRender::translate( sql = todo$sqlQueryContinuousFeatures, targetDialect = attr(connection, "dbms"), @@ -164,50 +221,62 @@ getDbDefaultCovariateData <- function(connection, andromedaTableName = "covariatesContinuous", snakeCaseToCamelCase = TRUE ) - } + } else { + sql <- " + INSERT INTO @target_covariate_continuous_table( + {@aggregated}?{ - # Covariate reference - sql <- SqlRender::translate( - sql = todo$sqlQueryFeatureRef, - targetDialect = attr(connection, "dbms"), - tempEmulationSchema = tempEmulationSchema - ) + cohort_definition_id, + covariate_id, + {@temporal | @temporal_sequence} ? {time_id,} + count_value, + min_value, + max_value, + average_value, + standard_deviation, + median_value, + p10_value, + p25_value, + p75_value, + p90_value - DatabaseConnector::querySqlToAndromeda( - connection = connection, - sql = sql, - andromeda = covariateData, - andromedaTableName = "covariateRef", - snakeCaseToCamelCase = TRUE - ) - collisions <- covariateData$covariateRef %>% - filter(collisions > 0) %>% - collect() - if (nrow(collisions) > 0) { - warning(sprintf( - "Collisions in covariate IDs detected for post-coordinated concepts with covariate IDs %s", - paste(collisions$covariateId, paste = ", ") - )) - } + }:{ - # Analysis reference - sql <- SqlRender::translate( - sql = todo$sqlQueryAnalysisRef, - targetDialect = attr(connection, "dbms"), - tempEmulationSchema = tempEmulationSchema - ) - DatabaseConnector::querySqlToAndromeda( - connection = connection, - sql = sql, - andromeda = covariateData, - andromedaTableName = "analysisRef", - snakeCaseToCamelCase = TRUE - ) + covariate_id, + {@temporal | @temporal_sequence} ? {time_id,} + row_id, + covariate_value + + } + + ) @sub_query;" + + sql <- SqlRender::render( + sql = sql, + target_covariate_continuous_table = targetTables$covariatesContinuous, + sub_query = gsub(";", "", todo$sqlQueryContinuousFeatures), + temporal = covariateSettings$temporal, + temporal_sequence = covariateSettings$temporalSequence, + aggregated = aggregated + ) - # Time reference - if (!is.null(todo$sqlQueryTimeRef)) { sql <- SqlRender::translate( - sql = todo$sqlQueryTimeRef, + sql = sql, + targetDialect = DatabaseConnector::dbms(connection), + tempEmulationSchema = tempEmulationSchema + ) + DatabaseConnector::executeSql( + connection = connection, + sql = sql + ) + } + } + + # Covariate reference + if (!is.null(todo$sqlQueryFeatureRef)) { + if (extractToAndromeda) { + sql <- SqlRender::translate( + sql = todo$sqlQueryFeatureRef, targetDialect = attr(connection, "dbms"), tempEmulationSchema = tempEmulationSchema ) @@ -215,62 +284,145 @@ getDbDefaultCovariateData <- function(connection, connection = connection, sql = sql, andromeda = covariateData, - andromedaTableName = "timeRef", + andromedaTableName = "covariateRef", snakeCaseToCamelCase = TRUE ) - } + collisions <- covariateData$covariateRef %>% + dplyr::filter(collisions > 0) %>% + dplyr::collect() - delta <- Sys.time() - start - ParallelLogger::logInfo("Fetching data took ", signif(delta, 3), " ", attr(delta, "units")) - } else { - # Don't fetch to R , but create on server instead - ParallelLogger::logInfo("Writing data to table") - start <- Sys.time() - convertQuery <- function(sql, databaseSchema, table) { - if (missing(databaseSchema) || is.null(databaseSchema)) { - tableName <- table - } else { - tableName <- paste(databaseSchema, table, sep = ".") + if (nrow(collisions) > 0) { + warning(sprintf( + "Collisions in covariate IDs detected for post-coordinated concepts with covariate IDs %s", + paste(collisions$covariateId, paste = ", ") + )) } - return(sub("FROM", paste("INTO", tableName, "FROM"), sql)) - } + } else { + sql <- " + INSERT INTO @target_covariate_ref_table( + covariate_id, + covariate_name, + analysis_id, + concept_id, + value_as_concept_id, + collisions + ) @sub_query ;" + + sql <- SqlRender::render( + sql = sql, + target_covariate_ref_table = targetTables$covariateRef, + sub_query = gsub(";", "", todo$sqlQueryFeatureRef) + ) - # Covariates - if (!is.null(todo$sqlQueryFeatures)) { - sql <- convertQuery(todo$sqlQueryFeatures, targetDatabaseSchema, targetCovariateTable) sql <- SqlRender::translate( sql = sql, - targetDialect = attr(connection, "dbms"), + targetDialect = DatabaseConnector::dbms(connection), tempEmulationSchema = tempEmulationSchema ) - DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) + DatabaseConnector::executeSql( + connection = connection, + sql = sql + ) } + } + - # Covariate reference - if (!missing(targetCovariateRefTable) && !is.null(targetCovariateRefTable)) { - sql <- convertQuery(todo$sqlQueryFeatureRef, targetDatabaseSchema, targetCovariateRefTable) + # Analysis reference + if (!is.null(todo$sqlQueryAnalysisRef)) { + if (extractToAndromeda) { sql <- SqlRender::translate( - sql = sql, + sql = todo$sqlQueryAnalysisRef, targetDialect = attr(connection, "dbms"), tempEmulationSchema = tempEmulationSchema ) - DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) - } + DatabaseConnector::querySqlToAndromeda( + connection = connection, + sql = sql, + andromeda = covariateData, + andromedaTableName = "analysisRef", + snakeCaseToCamelCase = TRUE + ) + } else { + sql <- " + INSERT INTO @target_analysis_ref_table( + analysis_id, + analysis_name, + domain_id, + {!@temporal} ? { + start_day, + end_day, + } + is_binary, + missing_means_zero + ) @sub_query ;" + + sql <- SqlRender::render( + sql = sql, + target_analysis_ref_table = targetTables$analysisRef, + sub_query = gsub(";", "", todo$sqlQueryAnalysisRef), + temporal = covariateSettings$temporal | covariateSettings$temporalSequence + ) - # Analysis reference - if (!missing(targetAnalysisRefTable) && !is.null(targetAnalysisRefTable)) { - sql <- convertQuery(todo$sqlQueryAnalysisRef, targetDatabaseSchema, targetAnalysisRefTable) sql <- SqlRender::translate( sql = sql, + targetDialect = DatabaseConnector::dbms(connection), + tempEmulationSchema = tempEmulationSchema + ) + DatabaseConnector::executeSql( + connection = connection, + sql = sql + ) + } + } + + + # Time reference + if (!is.null(todo$sqlQueryTimeRef)) { + if (extractToAndromeda) { + sql <- SqlRender::translate( + sql = todo$sqlQueryTimeRef, targetDialect = attr(connection, "dbms"), tempEmulationSchema = tempEmulationSchema ) - DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) + DatabaseConnector::querySqlToAndromeda( + connection = connection, + sql = sql, + andromeda = covariateData, + andromedaTableName = "timeRef", + snakeCaseToCamelCase = TRUE + ) + } else { + # TODO - what columns are in time ref table?! + sql <- " + INSERT INTO @target_time_ref_table( + time_part, + time_interval, + sequence_start_day, + sequence_end_day + ) @sub_query;" + + sql <- SqlRender::render( + sql = sql, + target_covariate_ref_table = targetTables$timeRef, + sub_query = gsub(";", "", todo$sqlQueryTimeRef) + ) + + sql <- SqlRender::translate( + sql = sql, + targetDialect = DatabaseConnector::dbms(connection), + tempEmulationSchema = tempEmulationSchema + ) + DatabaseConnector::executeSql( + connection = connection, + sql = sql + ) } - delta <- Sys.time() - start - ParallelLogger::logInfo("Writing data took", signif(delta, 3), " ", attr(delta, "units")) } + + delta <- Sys.time() - start + ParallelLogger::logInfo("Fetching data took ", signif(delta, 3), " ", attr(delta, "units")) + # Drop temp tables sql <- SqlRender::translate( sql = todo$sqlCleanup, @@ -291,7 +443,7 @@ getDbDefaultCovariateData <- function(connection, } } - if (missing(targetCovariateTable) || is.null(targetCovariateTable)) { + if (extractToAndromeda) { attr(covariateData, "metaData") <- list() if (is.null(covariateData$covariates) && is.null(covariateData$covariatesContinuous)) { warning("No data found, probably because no covariates were specified.") @@ -304,5 +456,7 @@ getDbDefaultCovariateData <- function(connection, class(covariateData) <- "CovariateData" attr(class(covariateData), "package") <- "FeatureExtraction" return(covariateData) + } else { + return(invisible(NULL)) } } diff --git a/R/HelperFunctions.R b/R/HelperFunctions.R index 1979107..b05da06 100644 --- a/R/HelperFunctions.R +++ b/R/HelperFunctions.R @@ -1,4 +1,4 @@ -# Copyright 2025 Observational Health Data Sciences and Informatics +# Copyright 2026 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # diff --git a/R/Normalization.R b/R/Normalization.R index 45cace8..c0c6896 100644 --- a/R/Normalization.R +++ b/R/Normalization.R @@ -1,4 +1,4 @@ -# Copyright 2025 Observational Health Data Sciences and Informatics +# Copyright 2026 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # diff --git a/R/Table1.R b/R/Table1.R index d1dab25..fb963a0 100644 --- a/R/Table1.R +++ b/R/Table1.R @@ -1,4 +1,4 @@ -# Copyright 2025 Observational Health Data Sciences and Informatics +# Copyright 2026 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # diff --git a/R/UnitTestHelperFunctions.R b/R/UnitTestHelperFunctions.R index 205b598..bdf4499 100644 --- a/R/UnitTestHelperFunctions.R +++ b/R/UnitTestHelperFunctions.R @@ -1,4 +1,4 @@ -# Copyright 2025 Observational Health Data Sciences and Informatics +# Copyright 2026 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -59,6 +59,7 @@ #' @param minCharacterizationMean The minimum mean value for binary characterization output. Values below this will be cut off from output. This #' will help reduce the file size of the characterization output, but will remove information #' on covariates that have very low values. The default is 0. +#' @param ... Additional arguments, not used. #' @return #' Returns an object of type \code{covariateData}, containing information on the covariates. #' @@ -71,7 +72,8 @@ rowIdField = "subject_id", covariateSettings, aggregated = FALSE, - minCharacterizationMean = 0) { + minCharacterizationMean = 0, + ...) { writeLines("Constructing length of observation covariates") if (covariateSettings$useLengthOfObs == FALSE) { return(NULL) diff --git a/docs/404.html b/docs/404.html index 69a032d..ee1f49a 100644 --- a/docs/404.html +++ b/docs/404.html @@ -1,5 +1,5 @@ - +
@@ -32,7 +32,7 @@ @@ -118,7 +118,7 @@vignettes/CreatingCovariatesBasedOnOtherCohorts.Rmd
CreatingCovariatesBasedOnOtherCohorts.RmdSite built with pkgdown 2.1.0.
+Site built with pkgdown 2.2.0.
diff --git a/docs/articles/CreatingCovariatesUsingCohortAttributes.html b/docs/articles/CreatingCovariatesUsingCohortAttributes.html index f544dcb..57aef7b 100644 --- a/docs/articles/CreatingCovariatesUsingCohortAttributes.html +++ b/docs/articles/CreatingCovariatesUsingCohortAttributes.html @@ -1,5 +1,5 @@ - + @@ -32,7 +32,7 @@ @@ -99,8 +99,8 @@vignettes/CreatingCovariatesUsingCohortAttributes.Rmd
CreatingCovariatesUsingCohortAttributes.RmdSite built with pkgdown 2.1.0.
+Site built with pkgdown 2.2.0.
diff --git a/docs/articles/CreatingCustomCovariateBuilders.html b/docs/articles/CreatingCustomCovariateBuilders.html index 61e963f..820a796 100644 --- a/docs/articles/CreatingCustomCovariateBuilders.html +++ b/docs/articles/CreatingCustomCovariateBuilders.html @@ -1,5 +1,5 @@ - + @@ -32,7 +32,7 @@ @@ -99,8 +99,8 @@vignettes/CreatingCustomCovariateBuilders.Rmd
CreatingCustomCovariateBuilders.RmdSite built with pkgdown 2.1.0.
+Site built with pkgdown 2.2.0.
diff --git a/docs/articles/CreatingCustomCovariateBuildersKorean.html b/docs/articles/CreatingCustomCovariateBuildersKorean.html index b38a499..ab9ea82 100644 --- a/docs/articles/CreatingCustomCovariateBuildersKorean.html +++ b/docs/articles/CreatingCustomCovariateBuildersKorean.html @@ -1,5 +1,5 @@ - + @@ -32,7 +32,7 @@ @@ -99,8 +99,8 @@vignettes/CreatingCustomCovariateBuildersKorean.Rmd
CreatingCustomCovariateBuildersKorean.Rmdvignettes/UsingFeatureExtraction.Rmd
UsingFeatureExtraction.RmdSite built with pkgdown 2.1.0.
+Site built with pkgdown 2.2.0.
diff --git a/docs/articles/UsingFeatureExtractionKorean.html b/docs/articles/UsingFeatureExtractionKorean.html index 8829312..2d07583 100644 --- a/docs/articles/UsingFeatureExtractionKorean.html +++ b/docs/articles/UsingFeatureExtractionKorean.html @@ -1,5 +1,5 @@ - + @@ -32,7 +32,7 @@ @@ -99,8 +99,8 @@vignettes/UsingFeatureExtractionKorean.Rmd
UsingFeatureExtractionKorean.RmdSite built with pkgdown 2.1.0.
+Site built with pkgdown 2.2.0.
diff --git a/docs/articles/index.html b/docs/articles/index.html index af982be..9a22c66 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -1,5 +1,5 @@ -Schuemie M, Suchard M, Ryan P, Reps J, Sena A, Inberg G (2025). +
Schuemie M, Suchard M, Ryan P, Reps J, Sena A, Inberg G (2026). FeatureExtraction: Generating Features for a Cohort. -R package version 3.12.0, https://github.com/OHDSI/FeatureExtraction. +R package version 3.13.0, https://github.com/OHDSI/FeatureExtraction.
@Manual{,
title = {FeatureExtraction: Generating Features for a Cohort},
author = {Martijn Schuemie and Marc Suchard and Patrick Ryan and Jenna Reps and Anthony Sena and Ger Inberg},
- year = {2025},
- note = {R package version 3.12.0},
+ year = {2026},
+ note = {R package version 3.13.0},
url = {https://github.com/OHDSI/FeatureExtraction},
}
@@ -136,7 +136,7 @@ New Features:
+getDbCovariateData in the database and added ability to control all target tables with new target*Table parameters (#152, #321)Bugfixes:
+getDbDefaultCovariateData works and is consistentAdditional arguments, not used.
# \donttest{
-connectionDetails <- Eunomia::getEunomiaConnectionDetails()
-Eunomia::createCohorts(
+connectionDetails <- Eunomia::getEunomiaConnectionDetails()
+Eunomia::createCohorts(
connectionDetails = connectionDetails,
cdmDatabaseSchema = "main",
cohortDatabaseSchema = "main",
@@ -237,8 +237,8 @@ Examples
aggregated = FALSE
)
#> Constructing covariates from cohort attributes table
-#> Inserting data took 0.00588 secs
-#> Loading took 0.0269 secs
+#> Inserting data took 0.0172 secs
+#> Loading took 0.0838 secs
# }
createCohortBasedTemporalCovariateSettings functions.
+(Optional) The name of the database schema where the resulting covariates +should be stored. If not provided, results will be fetched to R.
(Optional) The name of the table where the resulting covariates will
+be stored. If not provided, results will be fetched to R. The table can be
+a permanent table in the targetDatabaseSchema or a temp table. If
+it is a temp table, do not specify targetDatabaseSchema.
(Optional) The name of the table where the resulting continuous covariates should be stored.
(Optional) The name of the table where the covariate reference will be stored.
(Optional) The name of the table where the analysis reference will be stored.
(Optional) The name of the table for the time reference
Should aggregate statistics be computed instead of covariates per cohort entry?
Whether to export to a table rather than Andromeda object
Run sql to create table? Code does not check if table exists.
If targetDatabaseSchema, drop any existing tables. Otherwise, results are merged +into existing table data. Overides createTable.
(Optional) The name of the database schema where the resulting covariates +should be stored as a table. If not provided, results will be fetched to R.
(Optional) The name of the table where the resulting covariates will
+be stored. If not provided, results will be fetched to R. The table can be
+a permanent table in the targetDatabaseSchema or a temp table. If
+it is a temp table, do not specify targetDatabaseSchema.
(Optional) The name of the table where the resulting continuous covariates will
+be stored. If not provided, results will be fetched to R. The table can be
+a permanent table in the targetDatabaseSchema or a temp table. If
+it is a temp table, do not specify targetDatabaseSchema.
(Optional) The name of the table where the covariate reference will be stored. If
+it is a temp table, do not specify targetDatabaseSchema.
(Optional) The name of the table where the analysis reference will be stored. If
+it is a temp table, do not specify targetDatabaseSchema.
(Optional) The name of the table for the time reference. If
+it is a temp table, do not specify targetDatabaseSchema.
Should aggregate statistics be computed instead of covariates per cohort entry? If aggregated is set to FALSE, the results returned will be based @@ -218,9 +274,9 @@
# \donttest{
-eunomiaConnectionDetails <- Eunomia::getEunomiaConnectionDetails()
+eunomiaConnectionDetails <- Eunomia::getEunomiaConnectionDetails()
covSettings <- createDefaultCovariateSettings()
-Eunomia::createCohorts(
+Eunomia::createCohorts(
connectionDetails = eunomiaConnectionDetails,
cdmDatabaseSchema = "main",
cohortDatabaseSchema = "main",
@@ -258,9 +314,9 @@ Examples
#> Connecting using SQLite driver
#> Constructing features on server
#> | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
-#> Executing SQL took 1.41 secs
+#> Executing SQL took 2.06 secs
#> Fetching data from server
-#> Fetching data took 0.158 secs
+#> Fetching data took 0.671 secs
# }
Covariate Data
Aggregate covariate data
convertPrespecSettingsToDetailedSettings()
Convert prespecified covariate settings into detailed covariate settings
Covariate Data