forked from mi-erasmusmc/TreatmentPatterns
-
Notifications
You must be signed in to change notification settings - Fork 5
Person Id is changed by computePathways #358
Copy link
Copy link
Open
Description
Describe the bug
computePatways returns person level results (treatmentHistory) but the person Ids are not preserved which makes it difficult to debug test cases.
To Reproduce
library(dplyr)
#> Warning: package 'dplyr' was built under R version 4.5.2
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(CDMConnector)
json <- c('
{
"person": [
{
"person_id": 2,
"gender_concept_id": 8532,
"year_of_birth": 1970,
"month_of_birth": 4,
"day_of_birth": 6,
"birth_datetime": "1970-04-06",
"race_concept_id": 0,
"ethnicity_concept_id": 0,
"location_id": 0,
"provider_id": 0,
"care_site_id": 0
}
],
"observation_period": [
{
"observation_period_id": 2,
"person_id": 2,
"observation_period_start_date": "2014-01-01",
"observation_period_end_date": "2024-01-01",
"period_type_concept_id": 32817
}
],
"visit_occurrence": [],
"visit_detail": [],
"drug_exposure": [
{
"drug_exposure_id": 1,
"person_id": 2,
"drug_concept_id": 1311078,
"drug_exposure_start_date": "2017-11-08",
"drug_exposure_start_datetime": "2017-11-08",
"drug_exposure_end_date": "2017-11-09",
"drug_exposure_end_datetime": "2017-11-09",
"drug_type_concept_id": 38000177,
"drug_source_value": "cytarabine"
}
],
"condition_occurrence": [
{
"condition_occurrence_id": 1,
"person_id": 2,
"condition_concept_id": 140352,
"condition_start_date": "2016-05-05",
"condition_start_datetime": "2016-05-05",
"condition_end_date": "2023-07-20",
"condition_end_datetime": "2023-07-20",
"condition_type_concept_id": 32879,
"provider_id": 0,
"condition_source_value": "Acute myeloid leukemia, disease"
}
],
"procedure_occurrence": [],
"measurement": []
}
')
readr::write_file(json, file.path(tempdir(), "aml_tp.json"))
cdm <- TestGenerator::patientsCDM(tempdir(), "aml_tp", "5.4", "TP_test5_cdm")
#> ✖ Standard table(s) in test data: person, observation_period, visit_occurrence, visit_detail, drug_exposure, condition_occurrence, procedure_occurrence and measurement
#> ✔ Patients pushed to blank CDM successfully
cdm <- generateConceptCohortSet(cdm, conceptSet = list(aml = 140352, cytarabine = 1311078), name = "cohort")
cohortCount(cdm$cohort) %>%
left_join(settings(cdm$cohort))
#> Joining with `by = join_by(cohort_definition_id)`
#> # A tibble: 2 × 8
#> cohort_definition_id number_records number_subjects cohort_name limit
#> <int> <int> <int> <chr> <chr>
#> 1 1 1 1 aml first
#> 2 2 1 1 cytarabine first
#> # ℹ 3 more variables: prior_observation <dbl>, future_observation <dbl>,
#> # end <chr>
cdm$cohort
#> # Source: table<cohort> [?? x 4]
#> # Database: DuckDB 1.4.4 [root@Darwin 25.2.0:R 4.5.1//private/var/folders/2j/8z0yfn1j69q8sxjc7vj9yhz40000gp/T/RtmpUleSLV/filefe693034dcde.duckdb]
#> cohort_definition_id subject_id cohort_start_date cohort_end_date
#> <int> <int> <date> <date>
#> 1 1 2 2016-05-05 2024-01-01
#> 2 2 2 2017-11-08 2024-01-01
cohortSet <- settings(cdm$cohort) |>
select(
cohortId = "cohort_definition_id",
cohortName = "cohort_name"
) |>
mutate(type = case_when(
stringr::str_detect(cohortName, "aml") ~ "target",
TRUE ~ "event"
))
andr <- TreatmentPatterns::computePathways(
cohorts = cohortSet,
cohortTableName = "cohort",
cdm = cdm,
analysisId = 1,
description = "",
tempEmulationSchema = NULL,
startAnchor = "startDate",
windowStart = 0,
endAnchor = "endDate",
windowEnd = 0,
minEraDuration = 1,
splitEventCohorts = NULL,
splitTime = NULL,
eraCollapseSize = 30,
combinationWindow = 7,
minPostCombinationDuration = 14,
filterTreatments = "First",
maxPathLength = 10,
overlapMethod = "truncate",
concatTargets = TRUE
)
#> -- Qualifying records for cohort definitions: 1, 2
#> Records: 2
#> Subjects: 1
#> -- Removing records < minEraDuration (1)
#> Records: 2
#> Subjects: 1
#> >> Starting on target: 1 (aml)
#> -- Removing events outside window (startDate: 0 | endDate: 0)
#> Records: 1
#> Subjects: 1
#> -- splitEventCohorts
#> Records: 1
#> Subjects: 1
#> -- No eras needed Collapsing, eraCollapse (30)
#> Records: 1
#> Subjects: 1
#> -- After Combination
#> Records: 1
#> Subjects: 1
#> -- filterTreatments (First)
#> Records: 1
#> Subjects: 1
#> -- Max path length (10)
#> Records: 1
#> Subjects: 1
#> -- treatment construction done
#> Records: 1
#> Subjects: 1
andr$treatmentHistory %>% dplyr::collect()
#> # A tibble: 1 × 14
#> eventCohortId personId indexYear eventStartDate eventEndDate type age sex
#> <chr> <int> <int> <int> <int> <chr> <dbl> <chr>
#> 1 2 1 2016 17478 19723 event 47 FEMA…
#> # ℹ 6 more variables: targetCohortId <dbl>, n_target <dbl>, durationEra <int>,
#> # sortOrder <dbl>, eventSeq <int>, eventCohortName <chr>
cdmDisconnect(cdm)
Andromeda::close(andr)Created on 2026-02-17 with reprex v2.1.1
Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.5.1 (2025-06-13)
#> os macOS Tahoe 26.2
#> system aarch64, darwin20
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz UTC
#> date 2026-02-17
#> pandoc 3.6.3 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via rmarkdown)
#> quarto 1.8.27 @ /usr/local/bin/quarto
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> Andromeda 0.6.3 2023-03-26 [1] CRAN (R 4.5.1)
#> arrow 22.0.0.1 2025-12-23 [1] CRAN (R 4.5.2)
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.5.0)
#> backports 1.5.0 2024-05-23 [1] CRAN (R 4.5.0)
#> bit 4.6.0 2025-03-06 [1] CRAN (R 4.5.0)
#> bit64 4.6.0-1 2025-01-16 [1] CRAN (R 4.5.0)
#> blob 1.3.0 2026-01-14 [1] CRAN (R 4.5.2)
#> brio 1.1.5 2024-04-24 [1] CRAN (R 4.5.0)
#> cachem 1.1.0 2024-05-16 [1] CRAN (R 4.5.0)
#> CDMConnector * 2.4.0 2026-02-16 [1] local
#> cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.5.0)
#> checkmate 2.3.4 2026-02-03 [1] CRAN (R 4.5.2)
#> cli 3.6.5 2025-04-23 [1] CRAN (R 4.5.0)
#> DBI 1.2.3 2024-06-02 [1] CRAN (R 4.5.0)
#> dbplyr 2.5.2 2026-02-13 [1] CRAN (R 4.5.2)
#> digest 0.6.39 2025-11-19 [1] CRAN (R 4.5.2)
#> dplyr * 1.2.0 2026-02-03 [1] CRAN (R 4.5.2)
#> duckdb 1.4.4 2026-01-28 [1] CRAN (R 4.5.2)
#> evaluate 1.0.5 2025-08-27 [1] CRAN (R 4.5.0)
#> farver 2.1.2 2024-05-13 [1] CRAN (R 4.5.0)
#> fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.5.0)
#> fs 1.6.6 2025-04-12 [1] CRAN (R 4.5.0)
#> generics 0.1.4 2025-05-09 [1] CRAN (R 4.5.0)
#> ggplot2 4.0.2 2026-02-03 [1] CRAN (R 4.5.2)
#> glue 1.8.0 2024-09-30 [1] CRAN (R 4.5.0)
#> gtable 0.3.6 2024-10-25 [1] CRAN (R 4.5.0)
#> hms 1.1.4 2025-10-17 [1] CRAN (R 4.5.0)
#> htmltools 0.5.9 2025-12-04 [1] CRAN (R 4.5.2)
#> jsonlite 2.0.0 2025-03-27 [1] CRAN (R 4.5.0)
#> knitr 1.51 2025-12-20 [1] CRAN (R 4.5.2)
#> lifecycle 1.0.5 2026-01-08 [1] CRAN (R 4.5.2)
#> magrittr 2.0.4 2025-09-12 [1] CRAN (R 4.5.0)
#> memoise 2.0.1 2021-11-26 [1] CRAN (R 4.5.0)
#> omopgenerics 1.3.6 2026-01-28 [1] CRAN (R 4.5.2)
#> openxlsx 4.2.8.1 2025-10-31 [1] CRAN (R 4.5.0)
#> otel 0.2.0 2025-08-29 [1] CRAN (R 4.5.0)
#> pillar 1.11.1 2025-09-17 [1] CRAN (R 4.5.0)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.5.0)
#> purrr 1.2.1 2026-01-09 [1] CRAN (R 4.5.2)
#> R6 2.6.1 2025-02-15 [1] CRAN (R 4.5.0)
#> RColorBrewer 1.1-3 2022-04-03 [1] CRAN (R 4.5.0)
#> Rcpp 1.1.1 2026-01-10 [1] CRAN (R 4.5.2)
#> readr 2.1.6 2025-11-14 [1] CRAN (R 4.5.2)
#> readxl 1.4.5 2025-03-07 [1] CRAN (R 4.5.0)
#> reprex 2.1.1 2024-07-06 [1] CRAN (R 4.5.0)
#> rJava 1.0-11 2024-01-26 [1] CRAN (R 4.5.0)
#> rlang 1.1.7 2026-01-09 [1] CRAN (R 4.5.2)
#> rmarkdown 2.30 2025-09-28 [1] CRAN (R 4.5.0)
#> RSQLite 2.4.4 2025-11-10 [1] CRAN (R 4.5.0)
#> rstudioapi 0.18.0 2026-01-16 [1] CRAN (R 4.5.2)
#> S7 0.2.1 2025-11-14 [1] CRAN (R 4.5.2)
#> scales 1.4.0 2025-04-24 [1] CRAN (R 4.5.0)
#> sessioninfo 1.2.3 2025-02-05 [1] CRAN (R 4.5.0)
#> snakecase 0.11.1 2023-08-27 [1] CRAN (R 4.5.0)
#> stringi 1.8.7 2025-03-27 [1] CRAN (R 4.5.0)
#> stringr 1.6.0 2025-11-04 [1] CRAN (R 4.5.0)
#> TestGenerator 0.5.0 2026-01-14 [1] CRAN (R 4.5.2)
#> testthat 3.3.2 2026-01-11 [1] CRAN (R 4.5.2)
#> tibble 3.3.1 2026-01-11 [1] CRAN (R 4.5.2)
#> tidyr 1.3.2 2025-12-19 [1] CRAN (R 4.5.2)
#> tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.5.0)
#> TreatmentPatterns 3.1.2 2026-02-16 [1] local
#> tzdb 0.5.0 2025-03-15 [1] CRAN (R 4.5.0)
#> utf8 1.2.6 2025-06-08 [1] CRAN (R 4.5.0)
#> vctrs 0.7.1 2026-01-23 [1] CRAN (R 4.5.2)
#> withr 3.0.2 2024-10-28 [1] CRAN (R 4.5.0)
#> xfun 0.56 2026-01-18 [1] CRAN (R 4.5.2)
#> yaml 2.3.12 2025-12-10 [1] CRAN (R 4.5.2)
#> zip 2.3.3 2025-05-13 [1] CRAN (R 4.5.0)
#>
#> [1] /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library
#> * ── Packages attached to the search path.
#>
#> ──────────────────────────────────────────────────────────────────────────────Notice that the person id in this example was changed from 2 to 1.
Expected behavior
Person ID in the output should match the subject_id in the input cohorts
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels