Skip to content

Commit

Permalink
fix pH unit harmonization
Browse files Browse the repository at this point in the history
  • Loading branch information
cristinamullin committed Jan 8, 2025
1 parent 7988b1b commit c2f5236
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 32 deletions.
14 changes: 0 additions & 14 deletions R/UnitConversions.R
Original file line number Diff line number Diff line change
Expand Up @@ -638,20 +638,6 @@ TADA_ConvertResultUnits <- function(.data, ref = "tada", transform = TRUE) {
# Remove unneccessary conversion columns
convert.data <- convert.data %>%
dplyr::select(-tidyselect::any_of(conversion.cols))


# Transform pH units to "STD UNITS"
if(any(convert.data$CharacteristicName == "pH")) {
print("TADA_ConvertResultUnits: harmonizing pH units to STD UNITS.")

convert.data <- convert.data %>%
dplyr::mutate(TADA.ResultMeasure.MeasureUnitCode = dplyr::case_when(
TADA.CharacteristicName == "PH" & TADA.ResultMeasure.MeasureUnitCode %in% NA ~ as.character("STD UNITS"),
TADA.CharacteristicName == "PH" & ResultMeasure.MeasureUnitCode %in% NA ~ as.character("STD UNITS"),
TADA.CharacteristicName == "PH" & ResultMeasure.MeasureUnitCode == "None" ~ as.character("STD UNITS"),
TADA.CharacteristicName == "PH" & ResultMeasure.MeasureUnitCode == "std units" ~ as.character("STD UNITS")
))
}

# Update ID and column ordering
convert.data <- TADA_CreateComparableID(convert.data)
Expand Down
29 changes: 27 additions & 2 deletions R/Utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,23 @@ TADA_AutoClean <- function(.data) {
# Substitute updated characteristic name for deprecated names
print("TADA_Autoclean: updating deprecated (i.e. retired) characteristic names.")
.data <- TADA_SubstituteDeprecatedChars(.data)


# Transform pH units to "STD UNITS" if "NONE" or "nu"
if(any(.data$TADA.CharacteristicName == "PH")) {
print("TADA_AutoClean: harmonizing pH units to STD UNITS.")

.data <- .data %>%
dplyr::mutate(TADA.ResultMeasure.MeasureUnitCode =
ifelse(TADA.CharacteristicName == "PH" &
TADA.ResultMeasure.MeasureUnitCode == "NONE"
| ResultMeasure.MeasureUnitCode == "None"
| ResultMeasure.MeasureUnitCode == "nu"
| is.na(ResultMeasure.MeasureUnitCode),
"STD UNITS", TADA.ResultMeasure.MeasureUnitCode
)
)
}

# Implement unit harmonization
print("TADA_Autoclean: harmonizing result and depth units.")
.data <- suppressWarnings(TADA_ConvertResultUnits(.data, transform = TRUE, ref = "tada"))
Expand Down Expand Up @@ -718,7 +734,16 @@ TADA_SubstituteDeprecatedChars <- function(.data) {
#' @export
#'
TADA_CreateComparableID <- function(.data) {
TADA_CheckColumns(.data, expected_cols = c("TADA.CharacteristicName", "TADA.ResultSampleFractionText", "TADA.MethodSpeciationName", "TADA.ResultMeasure.MeasureUnitCode"))

expected_cols <- c(
"TADA.CharacteristicName",
"TADA.ResultSampleFractionText",
"TADA.MethodSpeciationName",
"TADA.ResultMeasure.MeasureUnitCode"
)

TADA_CheckColumns(.data, expected_cols)

.data$TADA.ComparableDataIdentifier <- paste(.data$TADA.CharacteristicName, .data$TADA.ResultSampleFractionText, .data$TADA.MethodSpeciationName, .data$TADA.ResultMeasure.MeasureUnitCode, sep = "_")
return(.data)
}
Expand Down
Binary file modified data/Data_R5_TADAPackageDemo.rda
Binary file not shown.
39 changes: 23 additions & 16 deletions tests/testthat/test-Utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,35 @@ test_that("TADA_AutoClean function does not grow dataset", {
expect_true(dim(testautoclean1)[1] == dim(testautoclean2)[1])
})

test_that("TADA_AutoClean function harmonizes pH units", {
test_that("TADA_AutoClean: pH harmonization works as expected", {

get_random_pH_data <- TADA_RandomTestingData(
choose_random_state = TRUE,
number_of_days = 5,
# get random pH data
random_data <- TADA_RandomTestingData(
choose_random_state = FALSE,
number_of_days = 1,
autoclean = FALSE
)
random_pH_data <- dplyr::filter(random_data, CharacteristicName %in% "pH")

pHtest <- dplyr::filter(get_random_pH_data, CharacteristicName %in% "pH")

verify_random_data <- function() {
df <- pHtest()
while(nrow(df) < 200) {
df <- get_random_pH_data()
}
return(df)
while(nrow(random_pH_data) < 2) {
random_data <- TADA_RandomTestingData(
choose_random_state = FALSE,
number_of_days = 1,
autoclean = FALSE
)
random_pH_data <- dplyr::filter(random_data, CharacteristicName %in% "pH")
}

pHtest2 <- TADA_AutoClean(pHtest)

print(unique(pHtest2$TADA.ResultMeasure.MeasureUnitCode))
# TADA_AutoClean harmonizes pH, and other mod 1 required functions remove garbage data
pHtest <- TADA_AutoClean(random_data)
pHtest2 <- TADA_SimpleCensoredMethods(pHtest)
pHtest3 <- TADA_AutoFilter(pHtest2)
pHtest4 <- TADA_RunKeyFlagFunctions(pHtest3)
pHtest5 <- TADA_HarmonizeSynonyms(pHtest4)

expect_true(unique(pHtest2$TADA.ResultMeasure.MeasureUnitCode) == "STD UNITS")
# Is pH data harmonized after above mod 1 functions have run?
pHtest6 <- dplyr::filter(pHtest5, CharacteristicName %in% "pH")
print(unique(pHtest6$TADA.ResultMeasure.MeasureUnitCode))
expect_true(unique(pHtest6$TADA.ResultMeasure.MeasureUnitCode) == "STD UNITS")

})

0 comments on commit c2f5236

Please sign in to comment.