Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 30 additions & 24 deletions R/CDMInterface.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,10 @@ fetchCohortTable <- function(cdm, cohorts, cohortTableName, andromeda, andromeda

for (tableName in cohortTableName) {
cdm$tp_temp_tbl <- cdm[[tableName]] %>%
dplyr::group_by(.data$subject_id) %>%
dplyr::mutate(
subject_id_origin = .data$subject_id
) %>%
dplyr::filter(.data$cohort_definition_id %in% cohortIds) %>%
dplyr::filter(!!CDMConnector::datediff("cohort_start_date", "cohort_end_date", interval = "day") >= minEraDuration) %>%
dplyr::group_by(.data$subject_id) %>%
dplyr::ungroup() %>%
dplyr::inner_join(
cdm$person,
by = dplyr::join_by(subject_id_origin == person_id)
by = dplyr::join_by(subject_id == person_id)
) %>%
dplyr::inner_join(
cdm$concept,
Expand All @@ -79,9 +72,6 @@ fetchCohortTable <- function(cdm, cohorts, cohortTableName, andromeda, andromeda
date_of_birth = as.Date(paste0(as.character(.data$year_of_birth), "-01-01"))) %>%
dplyr::mutate(
age = !!CDMConnector::datediff("date_of_birth", "cohort_start_date", interval = "year")) %>%
dplyr::mutate(
subject_id_origin = as.character(.data$subject_id_origin)
) %>%
dplyr::rename(sex = "concept_name") %>%
dplyr::mutate(
temp_date = as.Date("1970-01-01")
Expand All @@ -93,7 +83,6 @@ fetchCohortTable <- function(cdm, cohorts, cohortTableName, andromeda, andromeda
dplyr::select(
"cohort_definition_id",
"subject_id",
"subject_id_origin",
"cohort_start_date",
"cohort_end_date",
"age",
Expand All @@ -113,33 +102,50 @@ fetchCohortTable <- function(cdm, cohorts, cohortTableName, andromeda, andromeda

cdm <- CDMConnector::dropSourceTable(cdm = cdm, name = "tp_temp_tbl")

# Filter by minEraDuration first (dates are integers = days since epoch)
andromeda[[andromedaTableName]] <- andromeda[[andromedaTableName]] %>%
dplyr::mutate(r = dplyr::row_number()) %>%
dplyr::group_by(.data$subject_id_origin) %>%
dplyr::mutate(
subject_id = as.integer(min(.data$r, na.rm = TRUE))
) %>%
dplyr::select(-"r")
dplyr::mutate(cohort_definition_id = as.numeric(.data$cohort_definition_id)) %>%
dplyr::filter(
(.data$cohort_end_date - .data$cohort_start_date) >= minEraDuration
)

# Attrition: after minEraDuration filter
n <- andromeda[[andromedaTableName]] %>%
dplyr::group_by(.data$subject_id) %>%
dplyr::summarise(n = dplyr::n()) %>%
dplyr::pull()

appendAttrition(
toAdd = data.frame(
number_records = sum(n),
number_subjects = length(n),
reason_id = 2,
reason = sprintf("Removing records < minEraDuration (%s)", minEraDuration),
time_stamp = as.numeric(Sys.time())
),
andromeda = andromeda
)

# Filter to persons with at least one target cohort record
targetId <- as.numeric(targetCohortIds)

andromeda[[andromedaTableName]] <- andromeda[[andromedaTableName]] %>%
dplyr::mutate(cohort_definition_id = as.numeric(.data$cohort_definition_id)) %>%
dplyr::group_by(.data$subject_id) %>%
dplyr::filter(any(.data$cohort_definition_id %in% targetId, na.rm = TRUE)) %>%
dplyr::ungroup()


# Attrition: after target cohort filter
n <- andromeda[[andromedaTableName]] %>%
dplyr::group_by(.data$subject_id) %>%
dplyr::summarise(n = dplyr::n()) %>%
dplyr::pull()

appendAttrition(
toAdd = data.frame(
number_records = sum(n),
number_subjects = length(n),
reason_id = 2,
reason = sprintf("Removing records < minEraDuration (%s)", minEraDuration),
reason_id = 3,
reason = "After filtering to persons with >=1 target cohort record",
time_stamp = as.numeric(Sys.time())
),
andromeda = andromeda
Expand Down
1 change: 0 additions & 1 deletion R/TreatmentPatterns-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ utils::globalVariables(
"duration_max",
"duration_median",
"event_name",
"subject_id_origin",
"person_id",
"concept_id",
"year_of_birth",
Expand Down
2 changes: 1 addition & 1 deletion R/computePathways.R
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ computePathways <- function(
toAdd = data.frame(
number_records = attrCounts$nRecords,
number_subjects = attrCounts$nSubjects,
reason_id = 9,
reason_id = 11,
reason = sprintf("treatment construction done")
),
andromeda = andromeda
Expand Down
62 changes: 32 additions & 30 deletions R/constructPathways.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ constructPathways <- function(settings, andromeda) {
toAdd = data.frame(
number_records = attrCounts$nRecords,
number_subjects = attrCounts$nSubjects,
reason_id = 10,
reason_id = 12,
reason = sprintf("Max path length (%s)", settings$maxPathLength)
),
andromeda = andromeda
Expand Down Expand Up @@ -252,17 +252,37 @@ createTreatmentHistory <- function(
Andromeda::createIndex(andromeda$eventCohorts, c("personId", "startDate", "endDate"))
Andromeda::createIndex(andromeda$targetCohorts, c("personId", "indexDate", "endDate"))

andromeda[[sprintf("cohortTable_%s", targetCohortId)]] <- dplyr::full_join(
# Join events to targets with date constraints in the join to avoid cartesian products
andromeda[[sprintf("cohortTable_%s", targetCohortId)]] <- dplyr::inner_join(
x = andromeda$eventCohorts,
y = andromeda$targetCohorts,
by = dplyr::join_by(
personId == personId,
subject_id_origin == subject_id_origin,
y$indexDate <= x$startDate,
x$startDate <= y$endDate,
startDate >= indexDate,
startDate <= endDate
), suffix = c("Event", "Target")
)

# Attrition: subjects with events within window
nTargetSubjects <- andromeda$targetCohorts %>%
dplyr::distinct(.data$personId) %>%
dplyr::summarise(n = dplyr::n()) %>%
dplyr::pull()

attrCounts <- fetchAttritionCounts(andromeda, sprintf("cohortTable_%s", targetCohortId))
nSubjectsWithEvents <- attrCounts$nSubjects
nSubjectsWithoutEvents <- nTargetSubjects - nSubjectsWithEvents

appendAttrition(
toAdd = data.frame(
number_records = attrCounts$nRecords,
number_subjects = attrCounts$nSubjects,
reason_id = 4,
reason = sprintf("After joining events within window (%s subjects without events)", nSubjectsWithoutEvents)
),
andromeda = andromeda
)

andromeda$treatmentHistory <- andromeda[[sprintf("cohortTable_%s", targetCohortId)]] %>%
dplyr::select(
"personId",
Expand All @@ -279,25 +299,7 @@ createTreatmentHistory <- function(
dplyr::mutate(
durationEra = .data$eventEndDate - .data$eventStartDate,
eventCohortId = as.character(as.integer(.data$eventCohortId))
) %>%
dplyr::filter(
!is.na(.data$indexYear),
!is.na(.data$eventCohortId)
)

attrCounts <- fetchAttritionCounts(andromeda, "treatmentHistory")
appendAttrition(
toAdd = data.frame(
number_records = attrCounts$nRecords,
number_subjects = attrCounts$nSubjects,
reason_id = 3,
reason = sprintf(
"Removing events outside window (%s: %s | %s: %s)",
startAnchor, windowStart, endAnchor, windowEnd
)
),
andromeda = andromeda
)
return(invisible(NULL))
}

Expand Down Expand Up @@ -363,7 +365,7 @@ doSplitEventCohorts <- function(
toAdd = data.frame(
number_records = attrCounts$nRecords,
number_subjects = attrCounts$nSubjects,
reason_id = 4,
reason_id = 6,
reason = sprintf("splitEventCohorts")
),
andromeda = andromeda
Expand Down Expand Up @@ -439,7 +441,7 @@ doEraCollapseNew <- function(andromeda, eraCollapseSize) {
toAdd = data.frame(
number_records = attrCounts$nRecords,
number_subjects = attrCounts$nSubjects,
reason_id = 5,
reason_id = 7,
reason = sprintf("Iteration %s: Collapsing eras, eraCollapse (%s)", counter, eraCollapseSize)
),
andromeda = andromeda
Expand All @@ -453,7 +455,7 @@ doEraCollapseNew <- function(andromeda, eraCollapseSize) {
toAdd = data.frame(
number_records = attrCounts$nRecords,
number_subjects = attrCounts$nSubjects,
reason_id = 5,
reason_id = 7,
reason = sprintf("No eras needed Collapsing, eraCollapse (%s)", eraCollapseSize)
),
andromeda = andromeda
Expand Down Expand Up @@ -525,7 +527,7 @@ doEraCollapse <- function(andromeda, eraCollapseSize) {
toAdd = data.frame(
number_records = attrCounts$nRecords,
number_subjects = attrCounts$nSubjects,
reason_id = 5,
reason_id = 7,
reason = sprintf("Collapsing eras, eraCollapse (%s)", eraCollapseSize)
),
andromeda = andromeda
Expand Down Expand Up @@ -806,7 +808,7 @@ doCombinationWindow <- function(
toAdd = data.frame(
number_records = attrCounts$nRecords,
number_subjects = attrCounts$nSubjects,
reason_id = 6,
reason_id = 8,
reason = sprintf("Iteration %s: minPostCombinationDuration (%s), combinatinoWindow (%s)", iterations, minPostCombinationDuration, combinationWindow)
),
andromeda = andromeda
Expand All @@ -829,7 +831,7 @@ doCombinationWindow <- function(
toAdd = data.frame(
number_records = attrCounts$nRecords,
number_subjects = attrCounts$nSubjects,
reason_id = 7,
reason_id = 9,
reason = sprintf("After Combination")
),
andromeda = andromeda
Expand Down Expand Up @@ -996,7 +998,7 @@ doFilterTreatments <- function(andromeda, filterTreatments) {
toAdd = data.frame(
number_records = attrCounts$nRecords,
number_subjects = attrCounts$nSubjects,
reason_id = 8,
reason_id = 10,
reason = sprintf("filterTreatments (%s)", filterTreatments)
),
andromeda = andromeda
Expand Down
8 changes: 2 additions & 6 deletions R/exportPatientLevel.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
addOriginId <- function(table) {
andromeda <- table$src$con

andromeda$cohortTable %>%
dplyr::select("personId", "subject_id_origin") %>%
dplyr::inner_join(table, by = dplyr::join_by(personId == personId)) %>%
dplyr::distinct()
# personId already contains the original subject_id (no remapping)
table
}

writeTreatmentHistory <- function(andromeda, outputPath) {
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-exportPatientLevel.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,14 @@ test_that("exportPatientLevel", {
attrition <- read.csv(file.path(tempdir(), "attrition.csv"))
cdm_source_info <- read.csv(file.path(tempdir(), "cdm_source_info.csv"))

expect_equal(ncol(treatment_history), 13)
expect_equal(ncol(treatment_history), 12)
expect_equal(nrow(treatment_history), 553)

expect_equal(ncol(metadata), 5)
expect_equal(nrow(metadata), 1)

expect_equal(ncol(attrition), 5)
expect_equal(nrow(attrition), 11)
expect_equal(nrow(attrition), 12)

expect_equal(ncol(cdm_source_info), 10)
expect_equal(nrow(cdm_source_info), 1)
Expand Down
Loading