Skip to content

Commit

Permalink
Fix final 2024 res model export issues (#209)
Browse files Browse the repository at this point in the history
* Fix building-level unprorated predicted value calculation

It took me literally all day to figure out this single change for
multi-card prorated PINs

* Fix missing var and typos in comps report section

* Coerce n_years_exe to int to maintain Athena schema

* Set final 2024 res run for export

* Set final parameter values for 2024

* Update DR template notes and headings

* Swap card sheet column order

* Fix script ToC

* Skip recoding char_apts variable

* Adjust char_apts handling to match changes to ingest

* Replace NA handling for char_apts

* Update final run ID

* Fix char_apts display in workbooks
  • Loading branch information
dfsnow authored Feb 6, 2024
1 parent ed82769 commit 2a86b05
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 60 deletions.
Binary file modified misc/desk_review_template.xlsx
Binary file not shown.
10 changes: 5 additions & 5 deletions params.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ run_type: "test"
# Note included with each run. Use this to summarize what changed about the run
# or add context
run_note: |
Cutover to 2024 model (Chicago reassessment) using 2023 data
Final 2024 residential model with updated data and new features
toggle:
# Should the train stage run full cross-validation? Otherwise, the model
Expand All @@ -25,10 +25,10 @@ toggle:

# Should SHAP values be calculated for this run in the interpret stage? Can be
# desirable to save time when testing many models
shap_enable: FALSE
shap_enable: TRUE

# Should comps be calculated for this run in the interpret stage?
comp_enable: FALSE
comp_enable: TRUE

# Upload all modeling artifacts and results to S3 in the upload stage. Set
# to FALSE if you are not a CCAO employee
Expand Down Expand Up @@ -444,5 +444,5 @@ comp:
# Final run ID(s) chosen for export to Desk Review spreadsheets and iasWorld
# upload
export:
triad_code: "3"
run_id: "2023-03-14-clever-damani"
triad_code: "1"
run_id: "2024-02-06-relaxed-tristan"
28 changes: 15 additions & 13 deletions pipeline/02-assess.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ assessment_card_data_mc <- assessment_card_data_pred %>%
# For prorated PINs with multiple cards, take the average of the card
# (building) across PINs. This is because the same prorated building spread
# across multiple PINs sometimes receives different values from the model
group_by(meta_tieback_key_pin, meta_card_num) %>%
group_by(meta_tieback_key_pin, meta_card_num, char_land_sf) %>%
mutate(
pred_card_intermediate_fmv = ifelse(
is.na(meta_tieback_key_pin),
Expand Down Expand Up @@ -208,18 +208,16 @@ message("Prorating buildings")
assessment_pin_data_prorated <- assessment_pin_data_w_land %>%
group_by(meta_tieback_key_pin) %>%
mutate(
tieback_total_land_fmv = ifelse(
# 1. Determine the mean, unprorated building value for buildings that span
# multiple PINs. This is the mean value of the predicted value minus land
pred_pin_final_fmv_bldg_no_prorate = ifelse(
is.na(meta_tieback_key_pin),
pred_pin_final_fmv_land,
sum(pred_pin_final_fmv_land)
pred_pin_final_fmv_round_no_prorate - pred_pin_final_fmv_land,
mean(pred_pin_final_fmv_round_no_prorate - pred_pin_final_fmv_land)
)
) %>%
ungroup() %>%
mutate(
# 1. Subtract the TOTAL value of the land of all linked PINs. This leaves
# only the value of the building that spans the PINs
pred_pin_final_fmv_bldg_no_prorate =
pred_pin_final_fmv_round_no_prorate - tieback_total_land_fmv,
# 2. Multiply the building by the proration rate of each PIN/card. This is
# the proportion of the building's value held by each PIN
pred_pin_final_fmv_bldg =
Expand Down Expand Up @@ -314,18 +312,22 @@ message("Saving card-level data")

# Keep only card-level variables of interest, including: ID variables (run_id,
# pin, card), characteristics, and predictions
char_vars <- params$model$predictor$all[
grepl("^char_", params$model$predictor$all)
]
char_vars <- char_vars[!char_vars %in% c("char_apts", "char_recent_renovation")]
assessment_card_data_merged %>%
select(
meta_year, meta_pin, meta_class, meta_card_num, meta_card_pct_total_fmv,
meta_complex_id, pred_card_initial_fmv, pred_card_final_fmv, char_class,
all_of(params$model$predictor$all), township_code
) %>%
mutate(meta_complex_id = as.numeric(meta_complex_id)) %>%
ccao::vars_recode(
starts_with("char_"),
type = "long",
as_factor = FALSE
mutate(
meta_complex_id = as.numeric(meta_complex_id),
ccao_n_years_exe_homeowner = as.integer(ccao_n_years_exe_homeowner),
char_apts = as.character(char_apts)
) %>%
ccao::vars_recode(any_of(char_vars), type = "long", as_factor = FALSE) %>%
write_parquet(paths$output$assessment_card$local)


Expand Down
51 changes: 18 additions & 33 deletions pipeline/07-export.R
Original file line number Diff line number Diff line change
Expand Up @@ -281,12 +281,12 @@ num_apts_by_pin <- assessment_card %>%
# to scan in a spreadsheet
mutate(
char_apts = case_when(
char_apts == "None" | is.na(char_apts) ~ "Missing",
char_apts == "Two" ~ "2",
char_apts == "Three" ~ "3",
char_apts == "Four" ~ "4",
char_apts == "Five" ~ "5",
char_apts == "Six" ~ "6",
char_apts == "NONE" | is.na(char_apts) ~ NA_character_,
char_apts == "TWO" ~ "2",
char_apts == "THREE" ~ "3",
char_apts == "FOUR" ~ "4",
char_apts == "FIVE" ~ "5",
char_apts == "FIX" ~ "6",
TRUE ~ "Missing"
)
) %>%
Expand Down Expand Up @@ -431,12 +431,6 @@ assessment_pin_prepped <- assessment_pin_merged %>%
property_full_address,
"[^[:alnum:]|' ',.-]"
),
# char_apts should only apply to 211s and 212s
char_apts = ifelse(
(meta_class != "211" & meta_class != "212"),
NA,
char_apts
),
# char_ncu should only apply to 212s
char_ncu = ifelse(meta_class != "212", NA, char_ncu)
)
Expand Down Expand Up @@ -464,21 +458,13 @@ assessment_card_prepped <- assessment_card %>%
# Make sure the format of char_apts matches the short format we used to
# generate assessment_pin_prepped
char_apts = case_when(
char_apts == "None" | is.na(char_apts) ~ "Missing",
char_apts == "Two" ~ "2",
char_apts == "Three" ~ "3",
char_apts == "Four" ~ "4",
char_apts == "Five" ~ "5",
char_apts == "Six" ~ "6",
char_apts == "NONE" | is.na(char_apts) ~ NA_character_,
char_apts == "TWO" ~ "2",
char_apts == "THREE" ~ "3",
char_apts == "FOUR" ~ "4",
char_apts == "FIVE" ~ "5",
char_apts == "FIX" ~ "6",
TRUE ~ "Missing"
)
) %>%
mutate(
# char_apts should only apply to 211s and 212s
char_apts = ifelse(
(meta_class != "211" & meta_class != "212"),
NA,
char_apts
),
# char_ncu should only apply to 212s
char_ncu = ifelse(meta_class != "212", NA, char_ncu)
Expand Down Expand Up @@ -508,7 +494,7 @@ for (town in unique(assessment_pin_prepped$township_code)) {
filter(township_code == town) %>%
select(-township_code)

# 5.3 Comp details -----------------------------------------------------------
## 5.1 Comp details ----------------------------------------------------------

# Filter the training data so that we only display sales that are referenced.
# First, get the indexes of every sale whose comp is referenced in
Expand All @@ -525,7 +511,7 @@ for (town in unique(assessment_pin_prepped$township_code)) {
training_data_selected <- training_data_filtered %>%
select(
meta_pin, meta_sale_price, meta_sale_date, meta_class, meta_nbhd_code,
char_yrblt, loc_property_address, char_beds, char_ext_wall,
loc_property_address, char_yrblt, char_beds, char_ext_wall,
char_heat, char_bldg_sf, char_type_resd, char_land_sf
) %>%
ccao::vars_recode(type = "long")
Expand Down Expand Up @@ -568,7 +554,7 @@ for (town in unique(assessment_pin_prepped$township_code)) {
startCol = 1, startRow = 5, colNames = FALSE
)

## 5.2. PIN-Level ------------------------------------------------------------
# 5.2. PIN-Level -------------------------------------------------------------

# Update PIN-level data to link to comps detail sheet
training_data_ids <- training_data_filtered %>%
Expand Down Expand Up @@ -859,10 +845,9 @@ upload_data <- assessment_pin %>%
ungroup() %>%
select(
township_code,
PARID = meta_pin, CARD = meta_card_num,
USER37 = pred_card_final_fmv_no_prorate,
USER24 = meta_tieback_proration_rate.x,
OVRRCNLD = pred_card_final_fmv
PARID = meta_pin,
CARD = meta_card_num,
MV = pred_card_final_fmv_no_prorate
)


Expand Down
2 changes: 1 addition & 1 deletion reports/_setup.qmd
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
params:
run_id: "2024-01-29-boring-carly"
run_id: "2024-02-06-relaxed-tristan"
year: "2024"
---

Expand Down
11 changes: 4 additions & 7 deletions reports/performance/_comp.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@ homogeneous properties.
```{r _comp_neighborhood_means_coding}
base_data <- assessment_card %>%
mutate(triad = ccao::town_get_triad(as.character(township_code))) %>%
select(
meta_pin, meta_nbhd_code, meta_class, triad,
meta_card_num, meta_modeling_group
) %>%
select(meta_pin, meta_nbhd_code, meta_class, triad, meta_card_num) %>%
left_join(comp_df, by = c("meta_pin" = "pin", "meta_card_num" = "card"))
generate_plot <- function(df) {
Expand Down Expand Up @@ -60,10 +57,10 @@ plots$"Overall" <- base_data %>%
filter(triad == run_triad_code) %>%
generate_plot()
plots$"Single-Family" <- base_data %>%
filter(meta_modeling_group == "SF", triad == run_triad_code) %>%
filter(!(meta_class %in% c("211", "212")), triad == run_triad_code) %>%
generate_plot()
plots$"Multi-Family" <- base_data %>%
filter(meta_modeling_group == "MF", triad == run_triad_code) %>%
filter(meta_class %in% c("211", "212"), triad == run_triad_code) %>%
generate_plot()
```

Expand Down Expand Up @@ -227,7 +224,7 @@ leaflet() %>%
"<hr>",
"Certified FMV: ",
scales::dollar(meta_certified_tot, accuracy = 1),
"<br>Inital Card FMV: ",
"<br>Initial Card FMV: ",
scales::dollar(pred_card_initial_fmv, accuracy = 1),
"<hr>",
"Sale Date: ", meta_sale_date,
Expand Down
2 changes: 1 addition & 1 deletion reports/performance/performance.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ knitr:
out.width: "100%"
editor: source
params:
run_id: "2024-01-29-boring-carly"
run_id: "2024-02-06-relaxed-tristan"
year: "2024"
---

Expand Down

0 comments on commit 2a86b05

Please sign in to comment.