From 0751a2fecef8558d8411f03c0fe726b79122f708 Mon Sep 17 00:00:00 2001 From: edward-burn <9583964+edward-burn@users.noreply.github.com> Date: Sun, 8 Dec 2024 15:44:50 +0000 Subject: [PATCH 1/2] match description --- R/matchCohorts.R | 22 +++++++++------------- R/measurementCohort.R | 2 +- man/matchCohorts.Rd | 9 +++++++++ 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/R/matchCohorts.R b/R/matchCohorts.R index 377388c..002d98e 100644 --- a/R/matchCohorts.R +++ b/R/matchCohorts.R @@ -3,6 +3,15 @@ #' @description #' `matchCohorts()` generate a new cohort matched to individuals in an #' existing cohort. Individuals can be matched based on year of birth and sex. +#' Matching is done at the record level, so if individuals have multiple +#' cohort entries they can be matched to different individuals for each of their +#' records. +#' +#' Two new cohorts will be created when matching. The first is those +#' cohort entries which were matched ("_sampled" is added to the original +#' cohort name for this cohort). The other is the matches found from the +#' database population ("_matched" is added to the original cohort name +#' for this cohort). #' #' @inheritParams cohortDoc #' @inheritParams cohortIdSubsetDoc @@ -48,19 +57,6 @@ matchCohorts <- function(cohort, omopgenerics::assertLogical(matchSex, length = 1) omopgenerics::assertLogical(matchYearOfBirth, length = 1) - # Check if there are repeated people within the cohort - y <- cohort |> - dplyr::filter(.data$cohort_definition_id %in% cohortId) |> - dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |> - dplyr::filter(dplyr::n() >= 2) |> - dplyr::ungroup() |> - dplyr::tally() |> - dplyr::pull() - if (y != 0) { - cli::cli_warn( - "Multiple records per person detected. The matchCohorts() function is designed to operate under the assumption that there is only one record per person within each cohort. If this assumption is not met, each record will be treated independently. As a result, the same individual may be matched multiple times, leading to inconsistent and potentially misleading results." - ) - } # table prefix tablePrefix <- omopgenerics::tmpPrefix() diff --git a/R/measurementCohort.R b/R/measurementCohort.R index ae7cbc7..16fd648 100644 --- a/R/measurementCohort.R +++ b/R/measurementCohort.R @@ -119,7 +119,7 @@ measurementCohort <- function(cdm, if (ud > 0) { cli::cli_inform( - c("x" = "{.strong {ud}} concept{?s} excluded because don't correspond to the `Measurement` domain.") + c("x" = "{.strong {ud}} concept{?s} excluded because they don't correspond to the `Measurement` domain.") ) } diff --git a/man/matchCohorts.Rd b/man/matchCohorts.Rd index 533d429..4a55f2d 100644 --- a/man/matchCohorts.Rd +++ b/man/matchCohorts.Rd @@ -38,6 +38,15 @@ A cohort table. \description{ \code{matchCohorts()} generate a new cohort matched to individuals in an existing cohort. Individuals can be matched based on year of birth and sex. +Matching is done at the record level, so if individuals have multiple +cohort entries they can be matched to different individuals for each of their +records. + +Two new cohorts will be created when matching. The first is those +cohort entries which were matched ("_sampled" is added to the original +cohort name for this cohort). The other is the matches found from the +database population ("_matched" is added to the original cohort name +for this cohort). } \examples{ \donttest{ From 40c0fa25a55af06463529bbe5791b4e7761b52ec Mon Sep 17 00:00:00 2001 From: edward-burn <9583964+edward-burn@users.noreply.github.com> Date: Sun, 8 Dec 2024 15:49:35 +0000 Subject: [PATCH 2/2] fix test --- tests/testthat/test-matchCohorts.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/testthat/test-matchCohorts.R b/tests/testthat/test-matchCohorts.R index 7edf3be..50d8bfa 100644 --- a/tests/testthat/test-matchCohorts.R +++ b/tests/testthat/test-matchCohorts.R @@ -71,8 +71,6 @@ test_that("matchCohorts runs without errors", { # expect warnings cdm <- mockCohortConstructor(nPerson = 1000) - expect_warning(matchCohorts(cohort = cdm$cohort1, - name = "new_cohort")) PatientProfiles::mockDisconnect(cdm) })