Skip to content

Commit

Permalink
Merge pull request #413 from OHDSI/omock-new-release
Browse files Browse the repository at this point in the history
Mock data in tests to work with omock development version
  • Loading branch information
edward-burn authored Dec 19, 2024
2 parents 00a2db1 + f1146a2 commit 7502eca
Show file tree
Hide file tree
Showing 18 changed files with 145 additions and 225 deletions.
1 change: 1 addition & 0 deletions CohortConstructor.Rproj
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
Version: 1.0
ProjectId: 27e959ec-87b2-4ff1-ac1d-3ea886296a0d

RestoreWorkspace: No
SaveWorkspace: No
Expand Down
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,5 @@ Depends:
R (>= 4.1)
URL: https://ohdsi.github.io/CohortConstructor/, https://github.com/OHDSI/CohortConstructor
LazyData: true
Remotes:
ohdsi/omock
140 changes: 68 additions & 72 deletions R/conceptCohort.R
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,10 @@ conceptCohort <- function(cdm,
cdm[[name]] <- fulfillCohortReqs(cdm = cdm, name = name)

if(overlap == "merge"){
cli::cli_inform(c("i" = "Merging overlapping records."))
cdm[[name]] <- cdm[[name]] |>
joinOverlap(name = name, gap = 0) |>
omopgenerics::recordCohortAttrition(reason = "Merge overlapping records")
cli::cli_inform(c("i" = "Merging overlapping records."))
cdm[[name]] <- cdm[[name]] |>
joinOverlap(name = name, gap = 0) |>
omopgenerics::recordCohortAttrition(reason = "Merge overlapping records")
}

if(overlap == "extend"){
Expand All @@ -210,7 +210,7 @@ conceptCohort <- function(cdm,
extendOverlap(name = name) |>
omopgenerics::recordCohortAttrition(reason = "Add overlapping records")

# adding days might mean we no longer satisfy cohort requirements
# adding days might mean we no longer satisfy cohort requirements
cli::cli_inform(c("i" = "Re-appplying cohort requirements."))
cdm[[name]] <- fulfillCohortReqs(cdm = cdm, name = name)
}
Expand Down Expand Up @@ -492,86 +492,82 @@ extendOverlap <- function(cohort,
# Because once we add to a record this may cause a new overlap
# will do a while loop until all overlaps are resolved
while(hasOverlap(cohort)){
cli::cli_inform("Recursively adding overlapping records")
workingTblNames <- paste0(omopgenerics::uniqueTableName(), "_", c(1:4))
cohort <- cohort %>%
cli::cli_inform("Recursively adding overlapping records")
workingTblNames <- paste0(omopgenerics::uniqueTableName(), "_", c(1:4))
cohort <- cohort %>%
dplyr::mutate(record_id = dplyr::row_number()) |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[1])

# keep overlapping records
cohort_overlap <- cohort %>%
dplyr::inner_join(cohort,
by = c("cohort_definition_id", "subject_id"),
suffix = c("", "_overlap")) |>
dplyr::filter(
record_id != record_id_overlap,
cohort_start_date <= cohort_end_date_overlap &
cohort_end_date >= cohort_start_date_overlap
) |>
dplyr::select("cohort_definition_id", "subject_id",
"cohort_start_date", "cohort_end_date",
"record_id") |>
dplyr::distinct() |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[2])

cohort_no_overlap <- cohort |>
dplyr::anti_join(cohort_overlap |>
dplyr::select("record_id"),
by = "record_id") |>
dplyr::select(!"record_id") |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[3])

cohort_overlap <- cohort_overlap %>%
dplyr::mutate(days = !!CDMConnector::datediff("cohort_start_date",
"cohort_end_date")) |>
dplyr::group_by(dplyr::pick("cohort_definition_id",
"subject_id")) |>
dplyr::summarise(cohort_start_date = min(.data$cohort_start_date, na.rm = TRUE),
days = as.integer(sum(.data$days))) %>%
dplyr:: ungroup() %>%
dplyr::mutate(cohort_end_date = as.Date(
!!CDMConnector::dateadd(
date = "cohort_start_date",
number = "days",
interval = "day"
))) |>
dplyr::select(!"days") |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[4])

cohort <- dplyr::union_all(cohort_overlap,
cohort_no_overlap) |>
dplyr::compute(name = name, temporary = FALSE)
# keep overlapping records
cohort_overlap <- cohort %>%
dplyr::inner_join(cohort,
by = c("cohort_definition_id", "subject_id"),
suffix = c("", "_overlap")) |>
dplyr::filter(
record_id != record_id_overlap,
cohort_start_date <= cohort_end_date_overlap &
cohort_end_date >= cohort_start_date_overlap
) |>
dplyr::select("cohort_definition_id", "subject_id",
"cohort_start_date", "cohort_end_date",
"record_id") |>
dplyr::distinct() |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[2])

CDMConnector::dropTable(cdm = cdm,
name = workingTblNames)
cohort_no_overlap <- cohort |>
dplyr::anti_join(cohort_overlap |>
dplyr::select("record_id"),
by = "record_id") |>
dplyr::select(!"record_id") |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[3])

cohort_overlap <- cohort_overlap %>%
dplyr::mutate(days = !!CDMConnector::datediff("cohort_start_date",
"cohort_end_date")) |>
dplyr::group_by(dplyr::pick("cohort_definition_id",
"subject_id")) |>
dplyr::summarise(cohort_start_date = min(.data$cohort_start_date, na.rm = TRUE),
days = as.integer(sum(.data$days, na.rm = TRUE))) %>%
dplyr:: ungroup() %>%
dplyr::mutate(cohort_end_date = as.Date(
!!CDMConnector::dateadd(
date = "cohort_start_date",
number = "days",
interval = "day"
))) |>
dplyr::select(!"days") |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[4])

cohort <- dplyr::union_all(cohort_overlap, cohort_no_overlap) |>
dplyr::compute(name = name, temporary = FALSE)

CDMConnector::dropTable(cdm = cdm, name = workingTblNames)
}

cohort

}

hasOverlap <- function(cohort){
overlaps <- cohort |>
dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |>
dplyr::arrange(.data$cohort_start_date) |>
dplyr::mutate(
"next_cohort_start_date" = dplyr::lead(.data$cohort_start_date)
) |>
dplyr::filter(.data$cohort_end_date >= .data$next_cohort_start_date) |>
dplyr::ungroup() |>
dplyr::tally() |>
dplyr::collect()
overlaps <- cohort |>
dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |>
dplyr::arrange(.data$cohort_start_date) |>
dplyr::mutate(
"next_cohort_start_date" = dplyr::lead(.data$cohort_start_date)
) |>
dplyr::filter(.data$cohort_end_date >= .data$next_cohort_start_date) |>
dplyr::ungroup() |>
dplyr::tally() |>
dplyr::collect()

if (overlaps$n > 0) {
cli::cli_inform(" - {overlaps$n} overlapping record{?s} found")
if (overlaps$n > 0) {
cli::cli_inform(" - {overlaps$n} overlapping record{?s} found")
return(TRUE)
} else {
} else {
return(FALSE)
}

}

}
8 changes: 4 additions & 4 deletions R/intersectCohorts.R
Original file line number Diff line number Diff line change
Expand Up @@ -342,10 +342,10 @@ joinOverlap <- function(cohort,
cdm <- omopgenerics::cdmReference(cohort)

start <- cohort |>
dplyr::select(by, "date" := !!startDate) |>
dplyr::select(dplyr::all_of(by), "date" := !!startDate) |>
dplyr::mutate("date_id" = -1)
end <- cohort |>
dplyr::select(by, "date" := !!endDate) |>
dplyr::select(dplyr::all_of(by), "date" := !!endDate) |>
dplyr::mutate("date_id" = 1)
if (gap > 0) {
end <- end |>
Expand All @@ -357,7 +357,7 @@ joinOverlap <- function(cohort,
dplyr::compute(temporary = FALSE, name = workingTbl)

x <- x |>
dplyr::group_by(dplyr::pick(by)) |>
dplyr::group_by(dplyr::pick(dplyr::all_of(by))) |>
dplyr::arrange(.data$date, .data$date_id) |>
dplyr::mutate(
"cum_id" = cumsum(.data$date_id),
Expand Down Expand Up @@ -410,7 +410,7 @@ joinAll <- function(cohort,
}

x <- cohort |>
dplyr::group_by(dplyr::across(by)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(by))) |>
dplyr::summarise(
cohort_start_date =
min(.data$cohort_start_date, na.rm = TRUE),
Expand Down
2 changes: 1 addition & 1 deletion R/requireCohortIntersect.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ requireCohortIntersect <- function(cohort,
) |
(!.data$cohort_definition_id %in% .env$cohortId)
) |>
dplyr::select(cols) |>
dplyr::select(dplyr::all_of(cols)) |>
dplyr::compute(name = subsetName, temporary = FALSE)

# attrition reason
Expand Down
2 changes: 1 addition & 1 deletion R/requireConceptIntersect.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ requireConceptIntersect <- function(cohort,
) |
(!.data$cohort_definition_id %in% .env$cohortId)
) |>
dplyr::select(cols) |>
dplyr::select(dplyr::all_of(cols)) |>
dplyr::compute(name = subsetName, temporary = FALSE)

# attrition reason
Expand Down
2 changes: 1 addition & 1 deletion R/requireTableIntersect.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ requireTableIntersect <- function(cohort,
) |
(!.data$cohort_definition_id %in% .env$cohortId)
) |>
dplyr::select(cols) |>
dplyr::select(dplyr::all_of(cols)) |>
dplyr::compute(name = subsetName, temporary = FALSE)

# attrition reason
Expand Down
3 changes: 1 addition & 2 deletions tests/testthat/test-addIndex.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@

test_that("local tibble and duckdb test - will do nothing for these", {
skip_on_cran()
cdm <- omock::mockCdmReference() |>
omock::mockCdmFromTables(tables = list("cohort" = dplyr::tibble(
"cohort_definition_id" = 1,
"subject_id" = c(1, 2, 3),
"cohort_start_date" = as.Date("2020-01-01"),
"cohort_end_date" = as.Date("2029-12-31")
"cohort_end_date" = as.Date("2024-01-01")
)))
expect_no_error(cdm$cohort <- cdm$cohort |>
addCohortTableIndex())
Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test-collapseCohorts.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ test_that("simple example", {
"cohort_definition_id" = 1L,
"subject_id" = c(1L, 2L, 3L),
"cohort_start_date" = as.Date("2020-01-01"),
"cohort_end_date" = as.Date("2029-12-31")
"cohort_end_date" = as.Date("2024-01-01")
)))
cdm <- omopgenerics::insertTable(
cdm = cdm, name = "concept", table = dplyr::tibble(
Expand Down Expand Up @@ -70,7 +70,7 @@ test_that("simple example", {
expect_no_error(newCohort <- cohort |> collapseCohorts(gap = 1, name = "my_cohort"))
expect_identical(settings(newCohort), settings(cohort))
expect_identical(cohortCount(newCohort), dplyr::tibble(
"cohort_definition_id" = 1L, "number_records" = 4L, "number_subjects" = 2L
"cohort_definition_id" = 1L, "number_records" = 2L, "number_subjects" = 2L
))
# expect_identical(
# attrition(newCohort),
Expand Down Expand Up @@ -112,7 +112,7 @@ test_that("out of observation", {
"cohort_definition_id" = 1L,
"subject_id" = c(1L, 2L, 3L),
"cohort_start_date" = as.Date("2020-01-01"),
"cohort_end_date" = as.Date("2029-12-31")
"cohort_end_date" = as.Date("2024-01-01")
)))
cdm <- omopgenerics::insertTable(
cdm = cdm, name = "concept", table = dplyr::tibble(
Expand Down Expand Up @@ -169,7 +169,7 @@ test_that("out of observation", {
expect_no_error(newCohort <- cohort |> collapseCohorts(gap = 1, name = "my_cohort"))
expect_identical(settings(newCohort), settings(cohort))
expect_identical(cohortCount(newCohort), dplyr::tibble(
"cohort_definition_id" = 1L, "number_records" = 4L, "number_subjects" = 2L
"cohort_definition_id" = 1L, "number_records" = 2L, "number_subjects" = 2L
))
# expect_identical(
# attrition(newCohort),
Expand Down
Loading

0 comments on commit 7502eca

Please sign in to comment.