Skip to content

Commit

Permalink
copy DL to result in ID censored data
Browse files Browse the repository at this point in the history
includes many other small updates relevant to IDing censored data
  • Loading branch information
cristinamullin committed Dec 29, 2023
1 parent 2006da7 commit 05ac102
Show file tree
Hide file tree
Showing 12 changed files with 136 additions and 116 deletions.
Binary file removed .RDataTmp
Binary file not shown.
76 changes: 68 additions & 8 deletions R/CensoredDataSuite.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,70 @@ TADA_IDCensoredData <- function(.data) {
"TADA.ResultMeasureValueDataTypes.Flag"
)
TADA_CheckColumns(.data, expected_cols)

# Move detection limit value and unit to TADA Result Measure Value and Unit columns
# this first row copies all over when result is blank (NA) but
# TADA.DetectionQuantitationLimitMeasure.MeasureValue is not and the
# TADA.ResultMeasureValueDataTypes.Flag is not Text
# Imp note: TADA result values are NA for text even though they are not NA in the original result value
.data$TADA.ResultMeasureValue <- ifelse(
is.na(.data$TADA.ResultMeasureValue)
& !is.na(.data$TADA.DetectionQuantitationLimitMeasure.MeasureValue)
& .data$TADA.ResultMeasureValueDataTypes.Flag != "Text",
.data$TADA.DetectionQuantitationLimitMeasure.MeasureValue,
.data$TADA.ResultMeasureValue)
# this does the same as above for the units
.data$TADA.ResultMeasure.MeasureUnitCode <- ifelse(
is.na(.data$TADA.ResultMeasure.MeasureUnitCode)
& !is.na(.data$TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode)
& .data$TADA.ResultMeasureValueDataTypes.Flag != "Text",
.data$TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode,
.data$TADA.ResultMeasure.MeasureUnitCode)
.data$TADA.ResultMeasureValueDataTypes.Flag <- ifelse(
.data$TADA.ResultMeasureValueDataTypes.Flag == "NA - Not Available"
& !is.na(.data$TADA.DetectionQuantitationLimitMeasure.MeasureValue),
"Result Value/Unit Copied from Detection Limit",
.data$TADA.ResultMeasureValueDataTypes.Flag)

# this copies det lim result value and unit over to TADA result value and unit
# when the result value is TEXT but there is a specific text value that indicates
# the result is censored (BPQL, BDL, ND)
# and the TADA.DetectionQuantitationLimitMeasure.MeasureValue provided
.data$TADA.ResultMeasureValueDataTypes.Flag <- ifelse(
.data$TADA.ResultMeasureValueDataTypes.Flag == "Text" &
.data$ResultMeasureValue == "BPQL" |
.data$ResultMeasureValue == "BDL" |
.data$ResultMeasureValue == "ND" &
!is.na(.data$TADA.DetectionQuantitationLimitMeasure.MeasureValue),
"Result Value/Unit Copied from Detection Limit",
.data$TADA.ResultMeasureValueDataTypes.Flag)
.data$TADA.ResultMeasureValue <- ifelse(
is.na(.data$TADA.ResultMeasureValue)
& !is.na(.data$TADA.DetectionQuantitationLimitMeasure.MeasureValue)
& .data$ResultMeasureValue == "BPQL" |
.data$ResultMeasureValue == "BDL" |
.data$ResultMeasureValue == "ND" ,
.data$TADA.DetectionQuantitationLimitMeasure.MeasureValue,
.data$TADA.ResultMeasureValue)
# this does the same as above for the units
.data$TADA.ResultMeasure.MeasureUnitCode <- ifelse(
!is.na(.data$TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode)
& .data$ResultMeasureValue == "BPQL" |
.data$ResultMeasureValue == "BDL" |
.data$ResultMeasureValue == "ND" ,
.data$TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode,
.data$TADA.ResultMeasure.MeasureUnitCode)

# If user has not previously run TADA_FlagMeasureQualifierCode, run it here
# to add column TADA.MeasureQualifier.Flag to allow for using user-supplied
# Result Measure Qualifier codes to identify censored samples.
if (!"TADA.MeasureQualifierCode.Flag" %in% names(.data)) {
data_mq_flag <- TADA_FlagMeasureQualifierCode(.data)
} else {
data_mq_flag <- .data
}

## Run TADA_FlagMeasureQualifierCode to add column TADA.MeasureQualifier.Flag to allow for using user-supplied Result Measure Qualifier codes to identify censored samples.
data_mq_flag <- TADA_FlagMeasureQualifierCode(.data)


## Identify censored data using TADA.ResultMeasureValueDataTypes.Flag and TADA.MeasureQualifierCode.Flag
## Identify censored data using TADA.ResultMeasureValueDataTypes.Flag and TADA.MeasureQualifierCode.Flag
cens_rm_flag <- data_mq_flag %>% dplyr::filter(TADA.ResultMeasureValueDataTypes.Flag == "Result Value/Unit Copied from Detection Limit")
cens_mq_flag <- data_mq_flag %>% dplyr::filter(TADA.MeasureQualifierCode.Flag %in% c("Non-Detect", "Over-Detect")) %>%
dplyr::filter(!ResultIdentifier %in% cens_rm_flag$ResultIdentifier)
Expand Down Expand Up @@ -82,7 +140,7 @@ TADA_IDCensoredData <- function(.data) {
# NOTE that at this point, TADA.Detection_Type may be NA if there are detection conditions in dataset that are not present in domain table
if (any(cens$TADA.Detection_Type[!is.na(cens$TADA.Detection_Type)] == "ResultDetectionConditionText missing")) {
missing_detcond <- length(cens$TADA.Detection_Type[cens$TADA.Detection_Type == "ResultDetectionConditionText missing"])
print(paste0("TADA_IDCensoredData: There are ", missing_detcond, " results in your dataset that are missing ResultDetectionConditionText. Unless the ResultMeasureValue = 'ND' (indicating non-detect), TADA requires BOTH ResultDetectionConditionText and DetectionQuantitationLimitTypeName fields to be populated in order to categorize censored data. Please contact the TADA Admins to resolve."))
print(paste0("TADA_IDCensoredData: There are ", missing_detcond, " results in your dataset that are missing ResultDetectionConditionText. When TADA cannot clearly ID the result as a non-detect based in the metadata provided, TADA requires BOTH ResultDetectionConditionText and DetectionQuantitationLimitTypeName fields to be populated in order to categorize censored data. Please contact the TADA Admins to resolve."))
}

## Let user know when one or more result detection conditions are not in the ref table
Expand Down Expand Up @@ -135,6 +193,7 @@ TADA_IDCensoredData <- function(.data) {
print("TADA_IDCensoredData: No censored data detected in your dataset. Returning input dataframe with new column TADA.CensoredData.Flag set to Uncensored")
}

cens.check <- TADA_OrderCols(cens.check)
return(cens.check)
}

Expand Down Expand Up @@ -191,7 +250,8 @@ TADA_SimpleCensoredMethods <- function(.data, nd_method = "multiplier", nd_multi
stop("Please provide a multiplier for the upper detection limit handling method of 'multiplier'")
}

# If user has not previously run TADA_IDCensoredData function, run it here to get required columns
# If user has not previously run TADA_IDCensoredData function, run it here to get required columns and to copy
# detection limit to resut value
if (!"TADA.CensoredData.Flag" %in% names(.data)) {
cens.data <- TADA_IDCensoredData(.data)
} else {
Expand Down Expand Up @@ -238,7 +298,7 @@ TADA_SimpleCensoredMethods <- function(.data, nd_method = "multiplier", nd_multi
}

.data <- plyr::rbind.fill(nd, od, all_others)
.data <- TADA_OrderCols(.data)
}
.data <- TADA_OrderCols(.data)
return(.data)
}
5 changes: 2 additions & 3 deletions R/Maintenance.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,8 @@ TADA_UpdateExampleData <- function() {
od_method = "as-is",
od_multiplier = "null"
)
y <- dplyr::filter(y, TADA.ResultMeasureValueDataTypes.Flag != "NA - Not Applicable" &
TADA.ResultMeasureValueDataTypes.Flag != "Text" &
TADA.ResultMeasureValueDataTypes.Flag != "Coerced to NA" &
y <- dplyr::filter(y, TADA.ResultMeasureValueDataTypes.Flag != "Text" &
TADA.ResultMeasureValueDataTypes.Flag != "NA - Not Available" &
!is.na(TADA.ResultMeasureValue))
# uses default ref = TADA_GetSynonymRef()
Data_6Tribes_5y_Harmonized <- TADA_HarmonizeSynonyms(y)
Expand Down
17 changes: 9 additions & 8 deletions R/ResultFlagsDependent.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ TADA_FlagFraction <- function(.data, clean = TRUE, flaggedonly = FALSE) {
#' rows with invalid or nonstandardized characteristic-method speciation combinations.
#' Default is flaggedonly = FALSE.
#'
#' #' The “Not Reviewed” value within "TADA.ResultAboveUpperThreshold.Flag" means
#' The “Not Reviewed” value within "TADA.ResultAboveUpperThreshold.Flag" means
#' that the EPA WQX team has not yet reviewed the combinations
#' (see https://cdx.epa.gov/wqx/download/DomainValues/QAQCCharacteristicValidation.CSV).
#' The WQX team plans to review and update these new combinations quarterly.
Expand Down Expand Up @@ -506,16 +506,17 @@ TADA_FindQCActivities <- function(.data, clean = FALSE, flaggedonly = FALSE) {
dplyr::select(ActivityTypeCode, TADA.ActivityType.Flag)

# identify any Activity Type Codes not in reference table
# these are likely USGS only values
codes <- unique(.data$ActivityTypeCode)
if (any(!codes %in% qc.ref$ActivityTypeCode)) {
missing_codes <- codes[!codes %in% qc.ref$ActivityTypeCode]
missing_codes_df <- data.frame(
ActivityTypeCode = missing_codes,
TADA.ActivityType.Flag = "QC_uncategorized"
TADA.ActivityType.Flag = "Not Reviewed"
)
qc.ref <- rbind(qc.ref, missing_codes_df)
missing_codes <- paste(missing_codes, collapse = ", ")
print(paste0("ActivityTypeCode column in dataset contains value(s) ", missing_codes, " which is/are not represented in the ActivityType WQX domain table. These data records are placed under the TADA.ActivityType.Flag: 'QC_uncategorized'. Please contact TADA administrators to resolve."))
print(paste0("ActivityTypeCode column in dataset contains value(s) ", missing_codes, " which is/are not represented in the ActivityType WQX domain table. These data records are placed under the TADA.ActivityType.Flag: 'Not Reviewed'. Please contact TADA administrators to resolve."))
}

# populate flag column in data
Expand Down Expand Up @@ -564,8 +565,8 @@ TADA_FindQCActivities <- function(.data, clean = FALSE, flaggedonly = FALSE) {
#' This function removes rows where the result value is not numeric to
#' prepare a dataframe for quantitative analyses. Ideally, this function should
#' be run after other data cleaning, QA/QC, and harmonization steps are
#' completed using other TADA package functions, or manually. Specifically, .
#' this function removes rows with "Text","Coerced to NA", and "NA - Not Applicable"
#' completed using other TADA package functions, or manually. Specifically,
#' this function removes rows with "Text" and "NA - Not Available"
#' in the TADA.ResultMeasureValueDataTypes.Flag column, or NA in the
#' TADA.ResultMeasureValue column.
#'
Expand Down Expand Up @@ -593,9 +594,9 @@ TADA_AutoFilter <- function(.data) {
"ActivityTypeCode"
))

autofilter <- dplyr::filter(.data, TADA.ResultMeasureValueDataTypes.Flag != "NA - Not Applicable" &
autofilter <- dplyr::filter(.data, TADA.ResultMeasureValueDataTypes.Flag != "NA - Not Available" &
TADA.ResultMeasureValueDataTypes.Flag != "Text" &
TADA.ResultMeasureValueDataTypes.Flag != "Coerced to NA" &
TADA.ResultMeasureValueDataTypes.Flag != "NA - Not Available" &
!is.na(TADA.ResultMeasureValue)) # &
# TADA.ActivityMediaName == "WATER")

Expand Down Expand Up @@ -740,7 +741,7 @@ TADA_FlagMeasureQualifierCode <- function(.data, clean = FALSE, flaggedonly = FA
}

# rename ResultMeasureQualifier NA values to Pass in TADA.MeasureQualifierCode.Flag column
flag.data["TADA.MeasureQualifierCode.Flag"][is.na(flag.data["MeasureQualifierCode"])] <- "NA - Not Applicable"
flag.data["TADA.MeasureQualifierCode.Flag"][is.na(flag.data["MeasureQualifierCode"])] <- "NA - Not Available"

# clean dataframe
# if clean = FALSE, return full dataframe
Expand Down
2 changes: 1 addition & 1 deletion R/ResultFlagsIndependent.R
Original file line number Diff line number Diff line change
Expand Up @@ -1218,7 +1218,7 @@ TADA_FindPotentialDuplicatesSingleOrg <- function(.data) {
# tack depth columns onto additional grouping columns
colss <- c("OrganizationIdentifier", "MonitoringLocationIdentifier", "ActivityStartDate", "ActivityStartTime.Time", "ActivityTypeCode", "TADA.CharacteristicName", "SubjectTaxonomicName", "TADA.ResultSampleFractionText", "TADA.ResultMeasureValue", depthcols)

# find where the grouping using the columns above results in more than result identifier
# find where the grouping using the columns above results in more than one result identifier
dups_sum_org <- .data %>%
dplyr::group_by(dplyr::across(dplyr::any_of(colss))) %>%
dplyr::summarise(numres = length(unique(ResultIdentifier))) %>%
Expand Down
4 changes: 0 additions & 4 deletions R/Tables.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,6 @@ TADA_Stats <- function(.data, group_cols = c("TADA.ComparableDataIdentifier")) {
.data <- TADA_IDCensoredData(.data)
}

if (!"TADA.CensoredData.Flag" %in% names(.data)) {
.data <- TADA_IDCensoredData(.data)
}

if ("TADA.NutrientSummation.Flag" %in% names(.data)) {
print("Note: Your dataset contains TADA-generated total nutrient results, which have fewer columns populated with metadata. This might affect how groups are displayed in the stats table.")
}
Expand Down
Loading

0 comments on commit 05ac102

Please sign in to comment.