From 8050822a63fde8116181fa0817880d42e51492c6 Mon Sep 17 00:00:00 2001 From: Dan Snow <31494343+dfsnow@users.noreply.github.com> Date: Fri, 20 Dec 2024 13:52:21 -0600 Subject: [PATCH] Fix missing `geography_id` value for triads (#299) * Fix missing geography_id value for triads * Fix styler issue --- pipeline/03-evaluate.R | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/pipeline/03-evaluate.R b/pipeline/03-evaluate.R index 8c0480bf..7f72cb5c 100644 --- a/pipeline/03-evaluate.R +++ b/pipeline/03-evaluate.R @@ -61,6 +61,23 @@ assessment_data_pin <- read_parquet(paths$output$assessment_pin$local) %>% pred_pin_final_fmv_round, sale_ratio_study_price ) +# Helper function to return NA when sale sample size is too small +gte_n <- \(n_sales, min_n, fn, na_type) { + if (sum(!is.na(n_sales)) >= min_n) { + return(fn) + } else { + return(na_type) + } +} + +# Helper function to add triad code as geography ID if it's not already present +add_triad_code <- \(data) { + if (!"geography_id" %in% colnames(data)) { + data$geography_id <- data$triad_code + } + return(data) +} + @@ -73,15 +90,6 @@ assessment_data_pin <- read_parquet(paths$output$assessment_pin$local) %>% gen_agg_stats <- function(data, truth, estimate, bldg_sqft, rsn_col, rsf_col, triad, geography, class, col_dict, min_n) { - # Helper function to return NA when sale sample size is too small - gte_n <- \(n_sales, min_n, fn, na_type) { - if (sum(!is.na(n_sales)) >= min_n) { - return(fn) - } else { - return(na_type) - } - } - # List of summary stat/performance functions applied within summarize() below # Each function is listed on the right while the name of the function is on # the left @@ -239,7 +247,8 @@ gen_agg_stats <- function(data, truth, estimate, bldg_sqft, mutate(across( -(contains("_max") & contains("yoy")) & where(is.numeric), ~ replace(.x, !is.finite(.x), NA) - )) + )) %>% + add_triad_code() } @@ -299,7 +308,8 @@ gen_agg_stats_quantile <- function(data, truth, estimate, mutate(across( -(contains("_max") & contains("yoy")) & where(is.numeric), ~ replace(.x, !is.finite(.x), NA) - )) + )) %>% + add_triad_code() }