Skip to content

Commit

Permalink
Merge pull request #94 from oxford-pharmacoepi/trim_demographics
Browse files Browse the repository at this point in the history
Trim demographics
  • Loading branch information
edward-burn authored Apr 16, 2024
2 parents 84c3a14 + bffe71b commit 61478c4
Show file tree
Hide file tree
Showing 8 changed files with 859 additions and 1 deletion.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(cohortCodelist)
export(cohortCount)
export(collapseCohort)
export(conceptCohort)
export(demographicsCohort)
export(endDateColumn)
export(getIdentifier)
export(intersectCohorts)
Expand All @@ -26,6 +27,7 @@ export(settings)
export(splitOverlap)
export(startDateColumn)
export(tableName)
export(trimDemographics)
export(trimToDateRange)
export(unionCohorts)
importFrom(PatientProfiles,endDateColumn)
Expand Down
58 changes: 58 additions & 0 deletions R/demographicsCohort.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#' Create the observation period cohort
#'
#' @param cdm A cdm_reference.
#' @param name Name of the new cohort_table object.
#' @param ageRange A list of minimum and maximum age.
#' @param sex Can be "Both", "Male" or "Female". If one of the latter, only
#' those with that sex will be included.
#' @param minPriorObservation A minimum number of prior observation days in
#' the database.
#' @param minFutureObservation A minimum number of future observation days in
#' the database.
#'
#' @return The cohort with the observation period
#'
#' @export
#'
demographicsCohort <- function(cdm,
name,
ageRange = NULL,
sex = NULL,
minPriorObservation = NULL,
minFutureObservation = NULL) {
# initial checks
cdm <- validateCdm(cdm)
name <- validateName(name)

cdm[[name]] <- cdm$observation_period |>
dplyr::inner_join(
cdm$person |> dplyr::select("person_id") |> dplyr::distinct(),
by = "person_id"
) |>
dplyr::select(
"subject_id" = "person_id",
"cohort_start_date" = "observation_period_start_date",
"cohort_end_date" = "observation_period_end_date"
) |>
dplyr::mutate("cohort_definition_id" = 1) |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::newCohortTable(
cohortSetRef = dplyr::tibble(
"cohort_definition_id" = 1, "cohort_name" = "demographics"
),
cohortAttritionRef = NULL,
cohortCodelistRef = NULL
)

cdm[[name]] <- trimDemographics(
cohort = cdm[[name]],
cohortId = NULL,
ageRange = ageRange,
sex = sex,
minPriorObservation = minPriorObservation,
minFutureObservation = minFutureObservation,
name = name
)

return(cdm[[name]])
}
310 changes: 310 additions & 0 deletions R/trimDemographics.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,310 @@
#' Restrict cohort on patient demographics
#'
#' @param cohort A cohort table in a cdm reference.
#' @param cohortId Vector of cohort definition ids to include. If NULL, all
#' cohort definition ids will be used.
#' @param ageRange A list of minimum and maximum age.
#' @param sex Can be "Both", "Male" or "Female". If one of the latter, only
#' those with that sex will be included.
#' @param minPriorObservation A minimum number of prior observation days in
#' the database.
#' @param minFutureObservation A minimum number of future observation days in
#' the database.
#' @param name Name of the new cohort with the demographic requirements.
#'
#' @return The cohort table with only records for individuals satisfying the
#' demographic requirements
#'
#' @export
#'
trimDemographics <- function(cohort,
cohortId = NULL,
ageRange = NULL,
sex = NULL,
minPriorObservation = NULL,
minFutureObservation = NULL,
name = tableName(cohort)) {
# initial validation
cohort <- validateCohortTable(cohort, TRUE)
cohortId <- validateCohortId(cohortId, settings(cohort)$cohort_definition_id)
ageRange <- validateAgeRange(ageRange)
sex <- validateSex(sex)
minPriorObservation <- validateMinPriorObservation(minPriorObservation)
minFutureObservation <- validateMinFutureObservation(minFutureObservation)
name <- validateName(name)

cdm <- omopgenerics::cdmReference(cohort)
tablePrefix <- omopgenerics::tmpPrefix()

cli::cli_inform(c("i" = "Building new trimmed cohort"))

cohort <- cohort |>
dplyr::filter(.data$cohort_definition_id %in% .env$cohortId)

if (!is.null(ageRange)) {
cli::cli_inform(c("Adding birth date"))
cohort <- cohort |>
PatientProfiles::addDateOfBirth(name = "date_0") %>%
dplyr::mutate(!!!datesAgeRange(ageRange))
}
if (!is.null(minPriorObservation) |
!is.null(minFutureObservation) |
!is.null(sex)) {
cli::cli_inform(c("Adding demographics information"))
cohort <- cohort |>
PatientProfiles::addDemographics(
age = FALSE,
sex = !is.null(sex),
priorObservation = !is.null(minPriorObservation),
priorObservationType = "date",
futureObservation = !is.null(minFutureObservation),
futureObservationType = "date"
)
}

newSettings <- settings(cohort) |>
getNewSettings(
cohortId, ageRange, sex, minPriorObservation, minFutureObservation
)

# insert settings
nm <- omopgenerics::uniqueTableName(tablePrefix)
cdm <- omopgenerics::insertTable(
cdm = cdm,
name = nm,
table = newSettings |> dplyr::select(dplyr::any_of(c(
"cohort_definition_id", "require_min_age", "require_max_age",
"require_sex", "require_min_prior_observation",
"require_min_future_observation", "new_cohort_definition_id"
)))
)

cli::cli_inform(c("Creating initial cohort"))
cohort <- cohort |>
dplyr::inner_join(cdm[[nm]], by = "cohort_definition_id") |>
dplyr::select(-"cohort_definition_id") |>
dplyr::rename("cohort_definition_id" = "new_cohort_definition_id") |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::newCohortTable(
cohortSetRef = newSettings |>
dplyr::select(-"cohort_definition_id") |>
dplyr::rename("cohort_definition_id" = "new_cohort_definition_id"),
cohortAttritionRef = attrition(cohort) |>
dplyr::inner_join(
newSettings |>
dplyr::select("cohort_definition_id", "new_cohort_definition_id"),
by = "cohort_definition_id"
) |>
dplyr::select(-"cohort_definition_id") |>
dplyr::rename("cohort_definition_id" = "new_cohort_definition_id"),
cohortCodelistRef = attr(cohort, "cohort_codelist") |>
dplyr::collect() |>
dplyr::inner_join(
newSettings |>
dplyr::select("cohort_definition_id", "new_cohort_definition_id"),
by = "cohort_definition_id"
) |>
dplyr::select(-"cohort_definition_id") |>
dplyr::rename("cohort_definition_id" = "new_cohort_definition_id")
)

if (!is.null(sex)) {
cli::cli_inform(c("Trim sex"))
cohort <- cohort |>
dplyr::filter(
tolower(.data$sex) == .data$require_sex |
tolower(.data$require_sex) == "both"
) |>
dplyr::select(-c("sex", "require_sex")) |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::recordCohortAttrition("Restrict sex")
}
if (!is.null(ageRange)) {
cli::cli_inform(c("Trim age"))
cohort <- cohort %>%
dplyr::mutate(
!!!caseAge(ageRange),
"cohort_start_date" = dplyr::if_else(
.data$cohort_start_date <= .data$new_cohort_start_date,
.data$new_cohort_start_date,
.data$cohort_start_date
),
"cohort_end_date" = dplyr::if_else(
.data$cohort_end_date <= .data$new_cohort_end_date,
.data$cohort_end_date,
.data$new_cohort_end_date
)
) |>
dplyr::select(-c(
dplyr::starts_with("date_"), "require_min_age", "require_max_age",
"new_cohort_start_date", "new_cohort_end_date"
)) |>
dplyr::filter(.data$cohort_start_date <= .data$cohort_end_date) |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::recordCohortAttrition("Trim age_group")
}
if (!is.null(minPriorObservation)) {
cli::cli_inform(c("Trim prior observation"))
cohort <- cohort %>%
dplyr::mutate(
"new_cohort_start_date" = as.Date(!!CDMConnector::dateadd(
date = "prior_observation",
number = "require_min_prior_observation",
interval = "day"
)),
"cohort_start_date" = dplyr::if_else(
.data$new_cohort_start_date >= .data$cohort_start_date,
.data$new_cohort_start_date,
.data$cohort_start_date
)
) |>
dplyr::filter(.data$cohort_start_date <= .data$cohort_end_date) |>
dplyr::select(-c("require_min_prior_observation", "prior_observation", "new_cohort_start_date")) |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::recordCohortAttrition("Trim prior_observation")
}
if (!is.null(minFutureObservation)) {
cli::cli_inform(c("Trim future observation"))
cohort <- cohort %>%
dplyr::filter(
!!CDMConnector::datediff(
start = "cohort_start_date",
end = "future_observation",
interval = "day"
) >=
.data$require_min_future_observation
) |>
dplyr::select(-c("require_min_future_observation", "future_observation")) |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::recordCohortAttrition("Require future_observation")
}

# TODO update attrition names to be more coherent with the age groups, sex and so

cli::cli_inform(c("v" = "Cohort trimmed"))
return(cohort)
}

datesAgeRange <- function(ageRange) {
qA <- list()
values <- lapply(ageRange, function(x) {
x[2] <- x[2] + 1
return(x)
}) |>
unlist() |>
unique()
values <- values[!is.infinite(values)]
values <- values[values != 0]
glue::glue("as.Date(local(CDMConnector::dateadd('date_0', {values}, interval = 'year')))") |>
rlang::parse_exprs() |>
rlang::set_names(glue::glue("date_{values}"))
}
getNewSettings <- function(set, cohortId, age, sex, prior, future) {
if (length(age) == 0) {
ageId <- NULL
} else {
ageId <- seq_along(age)
}
if (length(prior) == 0) {
prior <- NULL
} else {
prior <- as.integer(prior)
}
if (length(future) == 0) {
future <- NULL
} else {
future <- as.integer(future)
}
sets <- tidyr::expand_grid(
"cohort_definition_id" = cohortId,
"require_age" = ageId,
"require_sex" = sex,
"require_min_prior_observation" = prior,
"require_min_future_observation" = future
)
if (!is.null(ageId)) {
ageMin <- lapply(age, function(x) {x[1]}) |> unlist()
ageMax <- lapply(age, function(x) {x[2]}) |> unlist()
sets <- sets |>
dplyr::inner_join(
dplyr::tibble(
"require_age" = ageId,
"require_min_age" = ageMin,
"require_max_age" = ageMax
),
by = "require_age"
) |>
dplyr::select(-"require_age")
}
sets <- sets |>
dplyr::select(dplyr::any_of(c(
"cohort_definition_id", "require_min_age", "require_max_age",
"require_sex", "require_min_prior_observation",
"require_min_future_observation"
)))
sets <- set |>
dplyr::inner_join(
sets, by = "cohort_definition_id", suffix = c(".original", "")
) |>
dplyr::mutate("new_cohort_definition_id" = dplyr::row_number())
if (!is.null(age)) {
sets <- sets |>
dplyr::mutate("cohort_name" = paste0(
.data$cohort_name, "_", .data$require_min_age, "_",
.data$require_max_age
))
}
if (!is.null(sex)) {
sets <- sets |>
dplyr::mutate("cohort_name" = paste0(
.data$cohort_name, "_", .data$require_sex
))
}
if (!is.null(prior)) {
sets <- sets |>
dplyr::mutate("cohort_name" = paste0(
.data$cohort_name, "_", .data$require_min_prior_observation
))
}
if (!is.null(future)) {
sets <- sets |>
dplyr::mutate("cohort_name" = paste0(
.data$cohort_name, "_", .data$require_min_future_observation
))
}
sets <- sets |>
# we will need a new release of omopgenerics so we can change ::: -> ::
dplyr::mutate("cohort_name" = omopgenerics:::toSnakeCase(.data$cohort_name))
return(sets)
}
caseAge <- function(age) {
prepareColStart <- function(x, col) {
num <- x |> unlist() |> unique() |> as.character() |> tolower()
x <- paste0("date_", num)
x <- paste0(".data$", col, " == ", num, " ~ .data$", x) |>
paste0(collapse = ",")
x <- paste0("dplyr::case_when(", x, ")") |>
rlang::parse_exprs() |>
rlang::set_names(c("new_cohort_start_date"))
return(x)
}
prepareColEnd <- function(x, col) {
num <- unique(unlist(x))
infFlag <- any(is.infinite(num))
num <- num[!is.infinite(num)]
x <- paste0(".data$", col, " == ", as.character(num), " ~ as.Date(local(CDMConnector::dateadd(date = 'date_", as.character(num+1) ,"', number = -1, interval = 'day')))")
if (infFlag) {
x <- c(x, paste0("is.infinite(.data$", col, ") ~ .data$cohort_end_date"))
}
x <- paste0(x, collapse = ", ")
x <- paste0("dplyr::case_when(", x, ")") |>
rlang::parse_exprs() |>
rlang::set_names("new_cohort_end_date")
return(x)
}
ageMin <- lapply(age, function(x){x[1]}) |>
prepareColStart("require_min_age")
ageMax <- lapply(age, function(x){x[2]}) |>
prepareColEnd("require_max_age")
c(ageMin, ageMax)
}
Loading

0 comments on commit 61478c4

Please sign in to comment.