From cc460880b59482416a7177de3ab5794d6964201f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=BAria=20Mercad=C3=A9-Besora?= <61558739+nmercadeb@users.noreply.github.com> Date: Mon, 20 Jan 2025 11:18:12 +0000 Subject: [PATCH] add intersect codes to codelist #410 --- R/requireCohortIntersect.R | 12 +- R/requireConceptIntersect.R | 26 +- tests/testthat/test-requireCohortIntersect.R | 239 +++++++++++------- tests/testthat/test-requireConceptIntersect.R | 80 ++++-- 4 files changed, 238 insertions(+), 119 deletions(-) diff --git a/R/requireCohortIntersect.R b/R/requireCohortIntersect.R index 41fb416..b5c8174 100644 --- a/R/requireCohortIntersect.R +++ b/R/requireCohortIntersect.R @@ -138,11 +138,21 @@ requireCohortIntersect <- function(cohort, reason <- glue::glue("{reason}, censoring at {censorDate}") } + # codelist + targetCodelist <- attr(cdm[[targetCohortTable]], "cohort_codelist") |> + dplyr::filter(.data$cohort_definition_id %in% .env$targetCohortId) |> + dplyr::collect() + newCodelist <- getIntersectionCodelist( + cohort, cohortId, targetCodelist + ) + # add additional columns x <- cohort |> dplyr::inner_join(subsetCohort, by = c(cols)) |> dplyr::compute(name = name, temporary = FALSE) |> - omopgenerics::newCohortTable(.softValidation = TRUE) |> + omopgenerics::newCohortTable( + .softValidation = TRUE, cohortCodelistRef = newCodelist + ) |> omopgenerics::recordCohortAttrition(reason = reason, cohortId = cohortId) omopgenerics::dropTable(cdm = cdm, name = subsetName) diff --git a/R/requireConceptIntersect.R b/R/requireConceptIntersect.R index cd78e6f..44b3a3e 100644 --- a/R/requireConceptIntersect.R +++ b/R/requireConceptIntersect.R @@ -132,10 +132,18 @@ requireConceptIntersect <- function(cohort, reason <- glue::glue("{reason}, censoring at {censorDate}") } + #codelist + newCodelist <- getIntersectionCodelist( + cohort, cohortId, conceptSetToCohortCodelist(conceptSet) + ) + + # cohort cohort <- cohort |> dplyr::inner_join(subsetCohort, by = c(cols)) |> dplyr::compute(name = name, temporary = FALSE) |> - omopgenerics::newCohortTable(.softValidation = TRUE) |> + omopgenerics::newCohortTable( + .softValidation = TRUE, cohortCodelistRef = newCodelist + ) |> omopgenerics::recordCohortAttrition(reason = reason, cohortId = cohortId) omopgenerics::dropTable(cdm = cdm, name = subsetName) @@ -151,3 +159,19 @@ requireConceptIntersect <- function(cohort, return(cohort) } + +getIntersectionCodelist <- function(cohort, cohortId, codelist) { + criteria <- "inclusion criteria" + intersectCodelist <- lapply( + as.list(cohortId), + function(x, tab = codelist) { + tab |> dplyr::mutate(cohort_definition_id = .env$x) + }) |> + dplyr::bind_rows() |> + dplyr::mutate(type = .env$criteria) + newCodelist <- attr(cohort, "cohort_codelist") |> + dplyr::collect() |> + dplyr::union(intersectCodelist) |> + dplyr::arrange(.data$cohort_definition_id) + return(newCodelist) +} diff --git a/tests/testthat/test-requireCohortIntersect.R b/tests/testthat/test-requireCohortIntersect.R index e40d6eb..1748850 100644 --- a/tests/testthat/test-requireCohortIntersect.R +++ b/tests/testthat/test-requireCohortIntersect.R @@ -9,10 +9,10 @@ test_that("requiring presence in another cohort", { start_cols <- colnames(cdm$cohort1) cdm$cohort3 <- requireCohortIntersect(cohort = cdm$cohort1, - targetCohortTable = "cohort2", - targetCohortId = 1, - window = c(-Inf, Inf), - name = "cohort3") + targetCohortTable = "cohort2", + targetCohortId = 1, + window = c(-Inf, Inf), + name = "cohort3") expect_identical(colnames(cdm$cohort3), colnames(cdm$cohort1)) expect_true(all(cdm$cohort3 |> @@ -32,10 +32,10 @@ test_that("requiring presence in another cohort", { "In cohort cohort_1 between -Inf & Inf days relative to cohort_start_date between 1 and Inf times"))) cdm$cohort4 <- requireCohortIntersect(cohort = cdm$cohort1, - targetCohortTable = "cohort2", - targetCohortId = 2, - window = list(c(-Inf, Inf)), - name = "cohort4") + targetCohortTable = "cohort2", + targetCohortId = 2, + window = list(c(-Inf, Inf)), + name = "cohort4") expect_true(all(cdm$cohort4 |> dplyr::distinct(subject_id) |> dplyr::pull() %in% @@ -54,9 +54,9 @@ test_that("requiring presence in another cohort", { # name cdm$cohort1 <- requireCohortIntersect(cohort = cdm$cohort1, - targetCohortTable = "cohort2", - targetCohortId = 2, - window = c(-Inf, Inf)) + targetCohortTable = "cohort2", + targetCohortId = 2, + window = c(-Inf, Inf)) expect_true(all(omopgenerics::attrition(cdm$cohort1)$reason == c("Initial qualifying events", "In cohort cohort_2 between -Inf & Inf days relative to cohort_start_date between 1 and Inf times", @@ -65,11 +65,11 @@ test_that("requiring presence in another cohort", { # censor date cdm$cohort5 <- requireCohortIntersect(cohort = cdm$cohort2, - targetCohortTable = "cohort1", - targetCohortId = 2, - window = c(0, Inf), - censorDate = "cohort_end_date", - name = "cohort5") + targetCohortTable = "cohort1", + targetCohortId = 2, + window = c(0, Inf), + censorDate = "cohort_end_date", + name = "cohort5") expect_true(all(cdm$cohort5 |> dplyr::pull("cohort_start_date") == c("2003-05-08", "2000-06-17", "2004-12-12"))) expect_true(all(cdm$cohort5 |> dplyr::pull("subject_id") == c("1", "1", "1"))) expect_true(all(cdm$cohort5 |> dplyr::pull("cohort_definition_id") == c("1", "2", "2"))) @@ -81,12 +81,12 @@ test_that("requiring presence in another cohort", { # cohort Id cdm$cohort6 <- requireCohortIntersect(cohort = cdm$cohort2, - cohortId = "cohort_2", - targetCohortTable = "cohort1", - targetCohortId = 1, - window = c(0, Inf), - censorDate = "cohort_end_date", - name = "cohort6") + cohortId = "cohort_2", + targetCohortTable = "cohort1", + targetCohortId = 1, + window = c(0, Inf), + censorDate = "cohort_end_date", + name = "cohort6") expect_true(all(cdm$cohort6 |> dplyr::pull("cohort_start_date") |> sort() == c("1999-07-11", "2000-01-11", "2000-05-28", "2000-06-17", "2003-05-08", "2004-12-12", "2015-01-25", "2015-02-02"))) @@ -99,12 +99,12 @@ test_that("requiring presence in another cohort", { cdm$cohort7 <- requireCohortIntersect(cohort = cdm$cohort2, intersections = c(0,0), - cohortId = 2, - targetCohortTable = "cohort1", - targetCohortId = 1, - window = c(0, Inf), - censorDate = "cohort_end_date", - name = "cohort7") + cohortId = 2, + targetCohortTable = "cohort1", + targetCohortId = 1, + window = c(0, Inf), + censorDate = "cohort_end_date", + name = "cohort7") expect_true(all(cdm$cohort7 |> dplyr::pull("cohort_start_date") |> sort() == c("2000-01-11", "2000-05-28", "2003-05-08", "2015-01-25"))) expect_true(all(cdm$cohort7 |> dplyr::pull("subject_id") |> sort() == c("1", "2", "2", "3"))) @@ -117,25 +117,25 @@ test_that("requiring presence in another cohort", { # expected errors # only support one target id at the moment expect_error(requireCohortIntersect(cohort = cdm$cohort1, - targetCohortTable = "cohort2", - targetCohortId = c(1,2), - window = c(-Inf, Inf))) + targetCohortTable = "cohort2", + targetCohortId = c(1,2), + window = c(-Inf, Inf))) expect_error(requireCohortIntersect(cohort = cdm$cohort1, - targetCohortTable = "cohort22", # does not exist - targetCohortId = 1, - window = c(-Inf, Inf))) + targetCohortTable = "cohort22", # does not exist + targetCohortId = 1, + window = c(-Inf, Inf))) expect_error(requireCohortIntersect(cohort = cdm$cohort1, - targetCohortTable = "cohort2", - targetCohortId = 10, # does not exist - window = c(-Inf, Inf))) + targetCohortTable = "cohort2", + targetCohortId = 10, # does not exist + window = c(-Inf, Inf))) expect_error(requireCohortIntersect(cohort = cdm$cohort1, - targetCohortTable = "cohort2", - targetCohortId = NULL, # only one id supported - window = c(-Inf, Inf))) + targetCohortTable = "cohort2", + targetCohortId = NULL, # only one id supported + window = c(-Inf, Inf))) expect_error(requireCohortIntersect(cohort = cdm$cohort1, - targetCohortTable = c("not_a_cohort", "lala"), - targetCohortId = 1, - window = c(-Inf, Inf))) + targetCohortTable = c("not_a_cohort", "lala"), + targetCohortId = 1, + window = c(-Inf, Inf))) PatientProfiles::mockDisconnect(cdm) }) @@ -150,16 +150,16 @@ test_that("requiring absence in another cohort", { cdm <- cdm_local |> copyCdm() cdm$cohort3_inclusion <- requireCohortIntersect(cohort = cdm$cohort1, - targetCohortTable = "cohort2", - targetCohortId = 1, - window = c(-Inf, Inf), - name = "cohort3_inclusion") + targetCohortTable = "cohort2", + targetCohortId = 1, + window = c(-Inf, Inf), + name = "cohort3_inclusion") cdm$cohort3_exclusion <- requireCohortIntersect(cohort = cdm$cohort1, intersections = c(0, 0), - targetCohortTable = "cohort2", - targetCohortId = 1, - window = c(-Inf, Inf), - name = "cohort3_exclusion") + targetCohortTable = "cohort2", + targetCohortId = 1, + window = c(-Inf, Inf), + name = "cohort3_exclusion") in_both <- intersect(cdm$cohort3_inclusion |> dplyr::pull("subject_id") |> unique(), @@ -201,80 +201,80 @@ test_that("different intersection count requirements", { as.Date('2019-01-06')) ) cdm_local <- omock::mockCdmReference() |> omock::mockCdmFromTables(tables = list("cohort1" = cohort1, - "cohort2" = cohort2), seed = 1) + "cohort2" = cohort2), seed = 1) cdm <- cdm_local |> copyCdm() # no intersections - people not in cohort2 expect_identical(sort(cdm$cohort1 |> - requireCohortIntersect(intersections = c(0, 0), - targetCohortId = 1, - window = c(-Inf, Inf), - targetCohortTable = "cohort2", - name = "cohort1_test") |> - dplyr::pull("subject_id")), as.integer(c(4,5,6,7,8,9,10))) + requireCohortIntersect(intersections = c(0, 0), + targetCohortId = 1, + window = c(-Inf, Inf), + targetCohortTable = "cohort2", + name = "cohort1_test") |> + dplyr::pull("subject_id")), as.integer(c(4,5,6,7,8,9,10))) # only one intersection expect_identical(sort(cdm$cohort1 |> - requireCohortIntersect(intersections = c(1, 1), - targetCohortId = 1, - window = c(-Inf, Inf), - targetCohortTable = "cohort2", - name = "cohort1_test") |> - dplyr::pull("subject_id")), c(1L)) + requireCohortIntersect(intersections = c(1, 1), + targetCohortId = 1, + window = c(-Inf, Inf), + targetCohortTable = "cohort2", + name = "cohort1_test") |> + dplyr::pull("subject_id")), c(1L)) expect_identical(sort(cdm$cohort1 |> - requireCohortIntersect(intersections = c(1), - targetCohortId = 1, - window = c(-Inf, Inf), - targetCohortTable = "cohort2", - name = "cohort1_test") |> - dplyr::pull("subject_id")), c(1L)) + requireCohortIntersect(intersections = c(1), + targetCohortId = 1, + window = c(-Inf, Inf), + targetCohortTable = "cohort2", + name = "cohort1_test") |> + dplyr::pull("subject_id")), c(1L)) # 2 intersections expect_identical(sort(cdm$cohort1 |> - requireCohortIntersect(intersections = c(2, 2), - targetCohortId = 1, - window = c(-Inf, Inf), - targetCohortTable = "cohort2", - name = "cohort1_test") |> - dplyr::pull("subject_id")), c(2L)) + requireCohortIntersect(intersections = c(2, 2), + targetCohortId = 1, + window = c(-Inf, Inf), + targetCohortTable = "cohort2", + name = "cohort1_test") |> + dplyr::pull("subject_id")), c(2L)) expect_identical(sort(cdm$cohort1 |> - requireCohortIntersect(intersections = c(2), - targetCohortId = 1, - window = c(-Inf, Inf), - targetCohortTable = "cohort2", - name = "cohort1_test") |> - dplyr::pull("subject_id")), c(2L)) + requireCohortIntersect(intersections = c(2), + targetCohortId = 1, + window = c(-Inf, Inf), + targetCohortTable = "cohort2", + name = "cohort1_test") |> + dplyr::pull("subject_id")), c(2L)) # 2 or more intersections expect_identical(sort(cdm$cohort1 |> - requireCohortIntersect(intersections = c(2, Inf), - targetCohortId = 1, - window = c(-Inf, Inf), - targetCohortTable = "cohort2", - name = "cohort1_test") |> - dplyr::pull("subject_id")), c(2L, 3L)) + requireCohortIntersect(intersections = c(2, Inf), + targetCohortId = 1, + window = c(-Inf, Inf), + targetCohortTable = "cohort2", + name = "cohort1_test") |> + dplyr::pull("subject_id")), c(2L, 3L)) # 2 or 3 intersections expect_identical(sort(cdm$cohort1 |> - requireCohortIntersect(intersections = c(2, 3), - targetCohortId = 1, - window = c(-Inf, Inf), - targetCohortTable = "cohort2", - name = "cohort1_test") |> - dplyr::pull("subject_id")), c(2L, 3L)) + requireCohortIntersect(intersections = c(2, 3), + targetCohortId = 1, + window = c(-Inf, Inf), + targetCohortTable = "cohort2", + name = "cohort1_test") |> + dplyr::pull("subject_id")), c(2L, 3L)) # expected errors expect_error(requireCohortIntersect(cohort = cdm$cohort1, - intersections = c(-10, 10), - targetCohortId = 1, - window = c(-Inf, Inf), - targetCohortTable = "cohort2")) + intersections = c(-10, 10), + targetCohortId = 1, + window = c(-Inf, Inf), + targetCohortTable = "cohort2")) expect_error(requireCohortIntersect(cohort = cdm$cohort1, intersections = c(11, 10), targetCohortId = 1, @@ -331,3 +331,46 @@ test_that("test indexes - postgres", { omopgenerics::dropTable(cdm = cdm, name = dplyr::starts_with("my_cohort")) CDMConnector::cdmDisconnect(cdm = cdm) }) + +test_that("codelists", { + cdm_local <- omock::mockCdmReference() |> + omock::mockPerson(n = 4, seed = 1) |> + omock::mockObservationPeriod(seed = 1) |> + omock::mockCohort(name = c("cohort1"), numberCohorts = 2, seed = 1) |> + omock::mockConditionOccurrence() + cdm <- cdm_local |> copyCdm() + + cdm$cohort2 <- conceptCohort(cdm, list("a" = 194152L, "b" = 4151660L), name = "cohort2") + # Only inclusion codes + cdm$cohort3 <- requireCohortIntersect(cohort = cdm$cohort1, + targetCohortTable = "cohort2", + targetCohortId = 1, + window = c(-Inf, Inf), + name = "cohort3") + expect_identical( + attr(cdm$cohort3, "cohort_codelist") |> dplyr::collect(), + dplyr::tibble( + cohort_definition_id = 1:2, + codelist_name = "a", + concept_id = 194152L, + type = "inclusion criteria" + ) + ) + + # no inlcusion codes + cdm$cohort4 <- requireCohortIntersect(cohort = cdm$cohort2, + targetCohortTable = "cohort1", + targetCohortId = 1, + window = c(-Inf, Inf), + name = "cohort4") + expect_identical( + attr(cdm$cohort4, "cohort_codelist") |> dplyr::collect(), + dplyr::tibble( + cohort_definition_id = 1:2L, + codelist_name = c("a", "b"), + concept_id = c(194152L, 4151660L), + type = "index event" + ) + ) + CDMConnector::cdmDisconnect(cdm = cdm) +}) diff --git a/tests/testthat/test-requireConceptIntersect.R b/tests/testthat/test-requireConceptIntersect.R index 88d5723b..f0d28be 100644 --- a/tests/testthat/test-requireConceptIntersect.R +++ b/tests/testthat/test-requireConceptIntersect.R @@ -30,9 +30,9 @@ test_that("require flag in concept", { start_cols <- colnames(cdm$cohort1) cdm$cohort3 <- requireConceptIntersect(cohort = cdm$cohort1, - conceptSet = list(a = 1L), - window = c(-Inf, Inf), - name = "cohort3") + conceptSet = list(a = 1L), + window = c(-Inf, Inf), + name = "cohort3") expect_identical(colnames(cdm$cohort3), colnames(cdm$cohort1)) expect_true(all(cdm$cohort3 |> dplyr::pull("subject_id") == 1L)) expect_true(all(cdm$cohort3 |> dplyr::pull("cohort_start_date") |> sort() == @@ -70,10 +70,10 @@ test_that("require flag in concept", { "Initial qualifying events"))) # censor date cdm$cohort5 <- requireConceptIntersect(cohort = cdm$cohort1, - conceptSet = list(a = 1L), - window = c(-Inf, Inf), - censorDate = "cohort_end_date", - name = "cohort5") + conceptSet = list(a = 1L), + window = c(-Inf, Inf), + censorDate = "cohort_end_date", + name = "cohort5") expect_true(cdm$cohort5 |> dplyr::pull("subject_id") |> length() == 0) expect_true(all(omopgenerics::attrition(cdm$cohort5)$reason == c("Initial qualifying events", @@ -83,8 +83,8 @@ test_that("require flag in concept", { # name cdm$cohort1 <- requireConceptIntersect(cohort = cdm$cohort1, - conceptSet = list(a = 1L), - window = c(-Inf, Inf)) + conceptSet = list(a = 1L), + window = c(-Inf, Inf)) expect_true(all(omopgenerics::attrition(cdm$cohort1)$reason == c("Initial qualifying events", "Concept a between -Inf & Inf days relative to cohort_start_date between 1 and Inf", @@ -105,16 +105,16 @@ test_that("require flag in concept", { # expected errors # only support one concept at the moment - expect_error( - requireConceptIntersect(cohort = cdm$cohort1, - conceptSet = list(a = 1L, b = 2L), - window = c(-Inf, Inf)) - ) - expect_error( - requireConceptIntersect(cohort = cdm$cohort1, - conceptSet = NULL, - window = c(-Inf, Inf)) - ) + expect_error( + requireConceptIntersect(cohort = cdm$cohort1, + conceptSet = list(a = 1L, b = 2L), + window = c(-Inf, Inf)) + ) + expect_error( + requireConceptIntersect(cohort = cdm$cohort1, + conceptSet = NULL, + window = c(-Inf, Inf)) + ) PatientProfiles::mockDisconnect(cdm) }) @@ -343,3 +343,45 @@ test_that("test indexes - postgres", { omopgenerics::dropTable(cdm = cdm, name = dplyr::starts_with("my_cohort")) CDMConnector::cdmDisconnect(cdm = cdm) }) + +test_that("codelists", { + cdm_local <- omock::mockCdmReference() |> + omock::mockPerson(n = 4, seed = 1) |> + omock::mockObservationPeriod(seed = 1) |> + omock::mockCohort(name = c("cohort1"), numberCohorts = 2, seed = 1) |> + omock::mockConditionOccurrence() + cdm <- cdm_local |> copyCdm() + + cdm$cohort2 <- conceptCohort(cdm, list("a" = 194152L, "b" = 4151660L), name = "cohort2") + # Only inclusion codes + cdm$cohort3 <- requireConceptIntersect(cohort = cdm$cohort1, + conceptSet = list("a" = 194152L), + intersections = 0, + window = c(-Inf, 0), + name = "cohort3") + expect_identical( + attr(cdm$cohort3, "cohort_codelist") |> dplyr::collect(), + dplyr::tibble( + cohort_definition_id = 1:2, + codelist_name = "a", + concept_id = 194152L, + type = "inclusion criteria" + ) + ) + + # no inlcusion codes + cdm$cohort4 <- requireConceptIntersect(cohort = cdm$cohort2, + conceptSet = list("a" = 194152L), + window = c(-Inf, Inf), + name = "cohort4") + expect_identical( + attr(cdm$cohort4, "cohort_codelist") |> dplyr::collect(), + dplyr::tibble( + cohort_definition_id = c(1L, 1L, 2L, 2L), + codelist_name = c("a", "a", "b", "a"), + concept_id = c(194152L, 194152L, 4151660L, 194152L), + type = c("index event", "inclusion criteria", "index event", "inclusion criteria") + ) + ) + CDMConnector::cdmDisconnect(cdm = cdm) +})