copy DL to result in ID censored data

includes many other small updates relevant to IDing censored data
USEPA · Dec 29, 2023 · 05ac102 · 05ac102
1 parent 2006da7
commit 05ac102
Show file tree

Hide file tree

Showing 12 changed files with 136 additions and 116 deletions.
diff --git a/.RDataTmp b/.RDataTmp
diff --git a/R/CensoredDataSuite.R b/R/CensoredDataSuite.R
@@ -37,12 +37,70 @@ TADA_IDCensoredData <- function(.data) {
     "TADA.ResultMeasureValueDataTypes.Flag"
   )
   TADA_CheckColumns(.data, expected_cols)
+
+ # Move detection limit value and unit to TADA Result Measure Value and Unit columns
+ # this first row copies all over when result is blank (NA) but 
+ # TADA.DetectionQuantitationLimitMeasure.MeasureValue is not and the 
+ # TADA.ResultMeasureValueDataTypes.Flag is not Text 
+ # Imp note: TADA result values are NA for text even though they are not NA in the original result value
+ .data$TADA.ResultMeasureValue <- ifelse(
+   is.na(.data$TADA.ResultMeasureValue)
+   & !is.na(.data$TADA.DetectionQuantitationLimitMeasure.MeasureValue)
+   & .data$TADA.ResultMeasureValueDataTypes.Flag != "Text",
+   .data$TADA.DetectionQuantitationLimitMeasure.MeasureValue, 
+   .data$TADA.ResultMeasureValue)
+ # this does the same as above for the units
+ .data$TADA.ResultMeasure.MeasureUnitCode <- ifelse(
+   is.na(.data$TADA.ResultMeasure.MeasureUnitCode)
+   & !is.na(.data$TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode)
+   & .data$TADA.ResultMeasureValueDataTypes.Flag != "Text", 
+   .data$TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode, 
+   .data$TADA.ResultMeasure.MeasureUnitCode)
+ .data$TADA.ResultMeasureValueDataTypes.Flag <- ifelse(
+   .data$TADA.ResultMeasureValueDataTypes.Flag == "NA - Not Available"
+   & !is.na(.data$TADA.DetectionQuantitationLimitMeasure.MeasureValue),
+   "Result Value/Unit Copied from Detection Limit",
+   .data$TADA.ResultMeasureValueDataTypes.Flag)
+
+ # this copies det lim result value and unit over to TADA result value and unit 
+ # when the result value is TEXT but there is a specific text value that indicates 
+ # the result is censored (BPQL, BDL, ND)
+ # and the TADA.DetectionQuantitationLimitMeasure.MeasureValue provided
+ .data$TADA.ResultMeasureValueDataTypes.Flag <- ifelse(
+   .data$TADA.ResultMeasureValueDataTypes.Flag == "Text" &
+     .data$ResultMeasureValue == "BPQL" |
+     .data$ResultMeasureValue == "BDL" |
+     .data$ResultMeasureValue == "ND" & 
+     !is.na(.data$TADA.DetectionQuantitationLimitMeasure.MeasureValue),
+   "Result Value/Unit Copied from Detection Limit", 
+   .data$TADA.ResultMeasureValueDataTypes.Flag)
+ .data$TADA.ResultMeasureValue <- ifelse(
+   is.na(.data$TADA.ResultMeasureValue)
+   & !is.na(.data$TADA.DetectionQuantitationLimitMeasure.MeasureValue)
+   & .data$ResultMeasureValue == "BPQL" |
+     .data$ResultMeasureValue == "BDL" |
+     .data$ResultMeasureValue == "ND" ,
+   .data$TADA.DetectionQuantitationLimitMeasure.MeasureValue, 
+   .data$TADA.ResultMeasureValue)
+ # this does the same as above for the units
+ .data$TADA.ResultMeasure.MeasureUnitCode <- ifelse(
+   !is.na(.data$TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode)
+   & .data$ResultMeasureValue == "BPQL" |
+     .data$ResultMeasureValue == "BDL" |
+     .data$ResultMeasureValue == "ND" , 
+   .data$TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode, 
+   .data$TADA.ResultMeasure.MeasureUnitCode)
+
+ # If user has not previously run TADA_FlagMeasureQualifierCode, run it here
+ # to add column TADA.MeasureQualifier.Flag to allow for using user-supplied 
+ # Result Measure Qualifier codes to identify censored samples. 
+ if (!"TADA.MeasureQualifierCode.Flag" %in% names(.data)) {
+   data_mq_flag <- TADA_FlagMeasureQualifierCode(.data)
+ } else {
+   data_mq_flag <- .data
+ }
 
-  ## Run TADA_FlagMeasureQualifierCode to add column TADA.MeasureQualifier.Flag to allow for using user-supplied Result Measure Qualifier codes to identify censored samples.
- data_mq_flag <- TADA_FlagMeasureQualifierCode(.data)
-
-
-  ## Identify censored data using TADA.ResultMeasureValueDataTypes.Flag and TADA.MeasureQualifierCode.Flag
+ ## Identify censored data using TADA.ResultMeasureValueDataTypes.Flag and TADA.MeasureQualifierCode.Flag
   cens_rm_flag <- data_mq_flag %>% dplyr::filter(TADA.ResultMeasureValueDataTypes.Flag == "Result Value/Unit Copied from Detection Limit")
   cens_mq_flag <- data_mq_flag %>% dplyr::filter(TADA.MeasureQualifierCode.Flag %in% c("Non-Detect", "Over-Detect")) %>%
     dplyr::filter(!ResultIdentifier %in% cens_rm_flag$ResultIdentifier)
@@ -82,7 +140,7 @@ TADA_IDCensoredData <- function(.data) {
     # NOTE that at this point, TADA.Detection_Type may be NA if there are detection conditions in dataset that are not present in domain table
     if (any(cens$TADA.Detection_Type[!is.na(cens$TADA.Detection_Type)] == "ResultDetectionConditionText missing")) {
       missing_detcond <- length(cens$TADA.Detection_Type[cens$TADA.Detection_Type == "ResultDetectionConditionText missing"])
-      print(paste0("TADA_IDCensoredData: There are ", missing_detcond, " results in your dataset that are missing ResultDetectionConditionText. Unless the ResultMeasureValue = 'ND' (indicating non-detect), TADA requires BOTH ResultDetectionConditionText and DetectionQuantitationLimitTypeName fields to be populated in order to categorize censored data. Please contact the TADA Admins to resolve."))
+      print(paste0("TADA_IDCensoredData: There are ", missing_detcond, " results in your dataset that are missing ResultDetectionConditionText. When TADA cannot clearly ID the result as a non-detect based in the metadata provided, TADA requires BOTH ResultDetectionConditionText and DetectionQuantitationLimitTypeName fields to be populated in order to categorize censored data. Please contact the TADA Admins to resolve."))
     }
 
     ## Let user know when one or more result detection conditions are not in the ref table
@@ -135,6 +193,7 @@ TADA_IDCensoredData <- function(.data) {
     print("TADA_IDCensoredData: No censored data detected in your dataset. Returning input dataframe with new column TADA.CensoredData.Flag set to Uncensored")
   }
 
+  cens.check <- TADA_OrderCols(cens.check)
   return(cens.check)
 }
 
@@ -191,7 +250,8 @@ TADA_SimpleCensoredMethods <- function(.data, nd_method = "multiplier", nd_multi
     stop("Please provide a multiplier for the upper detection limit handling method of 'multiplier'")
   }
 
-  # If user has not previously run TADA_IDCensoredData function, run it here to get required columns
+  # If user has not previously run TADA_IDCensoredData function, run it here to get required columns and to copy 
+  # detection limit to resut value
   if (!"TADA.CensoredData.Flag" %in% names(.data)) {
     cens.data <- TADA_IDCensoredData(.data)
   } else {
@@ -238,7 +298,7 @@ TADA_SimpleCensoredMethods <- function(.data, nd_method = "multiplier", nd_multi
     }
 
     .data <- plyr::rbind.fill(nd, od, all_others)
-    .data <- TADA_OrderCols(.data)
   }
+  .data <- TADA_OrderCols(.data)
   return(.data)
 }
diff --git a/R/Maintenance.R b/R/Maintenance.R
@@ -56,9 +56,8 @@ TADA_UpdateExampleData <- function() {
     od_method = "as-is",
     od_multiplier = "null"
   )
-  y <- dplyr::filter(y, TADA.ResultMeasureValueDataTypes.Flag != "NA - Not Applicable" &
-    TADA.ResultMeasureValueDataTypes.Flag != "Text" &
-    TADA.ResultMeasureValueDataTypes.Flag != "Coerced to NA" &
+  y <- dplyr::filter(y, TADA.ResultMeasureValueDataTypes.Flag != "Text" &
+    TADA.ResultMeasureValueDataTypes.Flag != "NA - Not Available" &
     !is.na(TADA.ResultMeasureValue))
   # uses default ref = TADA_GetSynonymRef()
   Data_6Tribes_5y_Harmonized <- TADA_HarmonizeSynonyms(y)

diff --git a/R/ResultFlagsDependent.R b/R/ResultFlagsDependent.R
@@ -129,7 +129,7 @@ TADA_FlagFraction <- function(.data, clean = TRUE, flaggedonly = FALSE) {
 #' rows with invalid or nonstandardized characteristic-method speciation combinations.
 #' Default is flaggedonly = FALSE.
 #'
-#' #' The “Not Reviewed” value within "TADA.ResultAboveUpperThreshold.Flag" means
+#' The “Not Reviewed” value within "TADA.ResultAboveUpperThreshold.Flag" means
 #' that the EPA WQX team has not yet reviewed the combinations
 #' (see https://cdx.epa.gov/wqx/download/DomainValues/QAQCCharacteristicValidation.CSV).
 #' The WQX team plans to review and update these new combinations quarterly.
@@ -506,16 +506,17 @@ TADA_FindQCActivities <- function(.data, clean = FALSE, flaggedonly = FALSE) {
     dplyr::select(ActivityTypeCode, TADA.ActivityType.Flag)
 
   # identify any Activity Type Codes not in reference table
+  # these are likely USGS only values
   codes <- unique(.data$ActivityTypeCode)
   if (any(!codes %in% qc.ref$ActivityTypeCode)) {
     missing_codes <- codes[!codes %in% qc.ref$ActivityTypeCode]
     missing_codes_df <- data.frame(
       ActivityTypeCode = missing_codes,
-      TADA.ActivityType.Flag = "QC_uncategorized"
+      TADA.ActivityType.Flag = "Not Reviewed"
     )
     qc.ref <- rbind(qc.ref, missing_codes_df)
     missing_codes <- paste(missing_codes, collapse = ", ")
-    print(paste0("ActivityTypeCode column in dataset contains value(s) ", missing_codes, " which is/are not represented in the ActivityType WQX domain table. These data records are placed under the TADA.ActivityType.Flag: 'QC_uncategorized'. Please contact TADA administrators to resolve."))
+    print(paste0("ActivityTypeCode column in dataset contains value(s) ", missing_codes, " which is/are not represented in the ActivityType WQX domain table. These data records are placed under the TADA.ActivityType.Flag: 'Not Reviewed'. Please contact TADA administrators to resolve."))
   }
 
   # populate flag column in data
@@ -564,8 +565,8 @@ TADA_FindQCActivities <- function(.data, clean = FALSE, flaggedonly = FALSE) {
 #' This function removes rows where the result value is not numeric to
 #' prepare a dataframe for quantitative analyses. Ideally, this function should
 #' be run after other data cleaning, QA/QC, and harmonization steps are
-#' completed using other TADA package functions, or manually. Specifically, .
-#' this function removes rows with "Text","Coerced to NA", and "NA - Not Applicable"
+#' completed using other TADA package functions, or manually. Specifically, 
+#' this function removes rows with "Text" and "NA - Not Available"
 #' in the TADA.ResultMeasureValueDataTypes.Flag column, or NA in the
 #' TADA.ResultMeasureValue column.
 #'
@@ -593,9 +594,9 @@ TADA_AutoFilter <- function(.data) {
     "ActivityTypeCode"
   ))
 
-  autofilter <- dplyr::filter(.data, TADA.ResultMeasureValueDataTypes.Flag != "NA - Not Applicable" &
+  autofilter <- dplyr::filter(.data, TADA.ResultMeasureValueDataTypes.Flag != "NA - Not Available" &
     TADA.ResultMeasureValueDataTypes.Flag != "Text" &
-    TADA.ResultMeasureValueDataTypes.Flag != "Coerced to NA" &
+    TADA.ResultMeasureValueDataTypes.Flag != "NA - Not Available" &
     !is.na(TADA.ResultMeasureValue)) # &
   # TADA.ActivityMediaName == "WATER")
 
@@ -740,7 +741,7 @@ TADA_FlagMeasureQualifierCode <- function(.data, clean = FALSE, flaggedonly = FA
   }
 
   # rename ResultMeasureQualifier NA values to Pass in TADA.MeasureQualifierCode.Flag column
-  flag.data["TADA.MeasureQualifierCode.Flag"][is.na(flag.data["MeasureQualifierCode"])] <- "NA - Not Applicable"
+  flag.data["TADA.MeasureQualifierCode.Flag"][is.na(flag.data["MeasureQualifierCode"])] <- "NA - Not Available"
 
   # clean dataframe
   # if clean = FALSE, return full dataframe

diff --git a/R/ResultFlagsIndependent.R b/R/ResultFlagsIndependent.R
@@ -1218,7 +1218,7 @@ TADA_FindPotentialDuplicatesSingleOrg <- function(.data) {
   # tack depth columns onto additional grouping columns
   colss <- c("OrganizationIdentifier", "MonitoringLocationIdentifier", "ActivityStartDate", "ActivityStartTime.Time", "ActivityTypeCode", "TADA.CharacteristicName", "SubjectTaxonomicName", "TADA.ResultSampleFractionText", "TADA.ResultMeasureValue", depthcols)
 
-  # find where the grouping using the columns above results in more than result identifier
+  # find where the grouping using the columns above results in more than one result identifier
   dups_sum_org <- .data %>%
     dplyr::group_by(dplyr::across(dplyr::any_of(colss))) %>%
     dplyr::summarise(numres = length(unique(ResultIdentifier))) %>%

diff --git a/R/Tables.R b/R/Tables.R
@@ -82,10 +82,6 @@ TADA_Stats <- function(.data, group_cols = c("TADA.ComparableDataIdentifier")) {
     .data <- TADA_IDCensoredData(.data)
   }
 
-  if (!"TADA.CensoredData.Flag" %in% names(.data)) {
-    .data <- TADA_IDCensoredData(.data)
-  }
-
   if ("TADA.NutrientSummation.Flag" %in% names(.data)) {
     print("Note: Your dataset contains TADA-generated total nutrient results, which have fewer columns populated with metadata. This might affect how groups are displayed in the stats table.")
   }