From d199d76ca4ce450287cc56d590d8f7745c86896e Mon Sep 17 00:00:00 2001 From: Dan Snow Date: Thu, 9 Jan 2025 18:24:15 +0000 Subject: [PATCH 1/6] Update params to match residential model --- params.yaml | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/params.yaml b/params.yaml index 641f137..e90fa88 100644 --- a/params.yaml +++ b/params.yaml @@ -168,7 +168,6 @@ model: - "prox_num_pin_in_half_mile" - "prox_num_bus_stop_in_half_mile" - "prox_num_foreclosure_per_1000_pin_past_5_years" - - "prox_num_school_in_half_mile" - "prox_airport_dnl_total" - "prox_nearest_bike_trail_dist_ft" - "prox_nearest_cemetery_dist_ft" @@ -176,20 +175,25 @@ model: - "prox_nearest_cta_stop_dist_ft" - "prox_nearest_hospital_dist_ft" - "prox_lake_michigan_dist_ft" - - "prox_nearest_major_road_dist_ft" - "prox_nearest_metra_route_dist_ft" - "prox_nearest_metra_stop_dist_ft" - "prox_nearest_park_dist_ft" - "prox_nearest_railroad_dist_ft" - - "prox_nearest_secondary_road_dist_ft" - "prox_nearest_university_dist_ft" - "prox_nearest_vacant_land_dist_ft" - "prox_nearest_water_dist_ft" - "prox_nearest_golf_course_dist_ft" + - "prox_nearest_road_highway_dist_ft" + - "prox_nearest_road_arterial_dist_ft" + - "prox_nearest_road_collector_dist_ft" + - "prox_nearest_road_highway_daily_traffic" + - "prox_nearest_road_arterial_daily_traffic" + - "prox_nearest_road_collector_daily_traffic" + - "prox_nearest_new_construction_dist_ft" + - "prox_nearest_stadium_dist_ft" - "acs5_percent_age_children" - "acs5_percent_age_senior" - "acs5_median_age_total" - - "acs5_percent_mobility_moved_from_other_state" - "acs5_percent_household_family_married" - "acs5_percent_household_nonfamily_alone" - "acs5_percent_education_high_school" @@ -203,11 +207,8 @@ model: - "acs5_median_household_total_occupied_year_built" - "acs5_median_household_renter_occupied_gross_rent" - "acs5_percent_household_owner_occupied" - - "acs5_percent_household_total_occupied_w_sel_cond" - - "acs5_percent_mobility_moved_in_county" - "other_tax_bill_rate" - "ccao_is_active_exe_homeowner" - - "ccao_is_corner_lot" - "ccao_n_years_exe_homeowner" - "time_sale_year" - "time_sale_day" @@ -217,6 +218,12 @@ model: - "time_sale_day_of_month" - "time_sale_day_of_week" - "time_sale_post_covid" + - "shp_parcel_centroid_dist_ft_sd" + - "shp_parcel_edge_len_ft_sd" + - "shp_parcel_interior_angle_sd" + - "shp_parcel_mrr_area_ratio" + - "shp_parcel_mrr_side_ratio" + - "shp_parcel_num_vertices" - "meta_strata_1" - "meta_strata_2" From efa177573f0c9e3615bdf5a89f8129a0a9be7aac Mon Sep 17 00:00:00 2001 From: Dan Snow Date: Thu, 9 Jan 2025 18:24:33 +0000 Subject: [PATCH 2/6] Hide echoed code in model reports --- reports/_setup.qmd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/reports/_setup.qmd b/reports/_setup.qmd index e977e6d..0336f68 100644 --- a/reports/_setup.qmd +++ b/reports/_setup.qmd @@ -1,4 +1,6 @@ --- +execute: + echo: FALSE params: run_id: "2024-02-08-dreamy-sam" year: "2024" From 1c8d5ebeac1bc8bb96bee50e620d7df257fdf5c0 Mon Sep 17 00:00:00 2001 From: Dan Snow Date: Thu, 9 Jan 2025 18:24:52 +0000 Subject: [PATCH 3/6] Plot variance wrt time --- reports/performance/_model.qmd | 129 +++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index d62cda1..ff061de 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1036,3 +1036,132 @@ model_big_misses_assessment %>% ``` ::: + +## Variance Over Time + +These plot shows show trends in the variance of sale price and estimated FMV. Ideally, the model's estimates should have the same variance as the true values (sales) with respect to time. + +::: {.panel-tabset} + +```{r _model_organize_variance_data} +training_data_monthly <- training_data_pred %>% + filter(!ind_pin_is_multicard, !sv_is_outlier) %>% + mutate( + meta_sale_date = as.Date(meta_sale_date), + year = year(meta_sale_date), + month = month(meta_sale_date), + difference = (pred_card_initial_fmv - meta_sale_price), + squared_difference = difference^2 + ) %>% + group_by(year, month) %>% + summarize( + total_sales = sum(meta_sale_price), + total_fmv = sum(pred_card_initial_fmv), + variance_sale = var(meta_sale_price), + variance_fmv = var(pred_card_initial_fmv), + mean_difference = mean(difference), + sse = sum(squared_difference), + n = n(), + .groups = "drop" + ) %>% + mutate( + variance_diff = variance_fmv - variance_sale, + date = make_date(year, month), + variance_ratio = variance_fmv / variance_sale, + percent_sales = n / sum(n) * 100, + percent_sse = sse / sum(sse) * 100 + ) +training_data_monthly_long <- training_data_monthly %>% + pivot_longer( + cols = c( + variance_sale, variance_fmv, percent_sales, + percent_sse, variance_diff + ), + names_to = "Metric", + values_to = "Value" + ) +``` + +### Variance Ratio (FMV / Sale Price) + +```{r _model_variance_ratio_chart} +ggplot(training_data_monthly, aes(x = date, y = variance_ratio)) + + geom_line() + + geom_point() + + labs( + x = "Date", + y = "Variance Ratio" + ) + + theme_minimal() +``` + +### Total FMV and Sale Price Variance + +```{r _model_overall_variance_chart} +ggplot( + training_data_monthly_long %>% filter(Metric %in% + c("variance_sale", "variance_fmv")), + aes(x = date, y = Value, color = Metric) +) + + geom_line() + + geom_point() + + geom_smooth(method = "loess", se = FALSE) + + labs( + x = "Month", + y = "Variance", + color = "Metric" + ) + + scale_color_discrete( + labels = c( + "variance_sale" = "Variance of Sale Price", + "variance_fmv" = "Variance of FMV" + ) + ) + + scale_y_continuous(labels = function(x) { + scales::label_scientific()(x) %>% + paste0("$", .) + }) + + theme_minimal() +``` + +### Variance Difference (Sale Price - FMV) + +```{r _model_variance_diff_chart} +ggplot(training_data_monthly, aes(x = date, y = variance_sale - variance_fmv)) + + geom_line() + + geom_point() + + geom_smooth(method = "loess", se = FALSE) + + labs( + x = "Date", + y = "Difference in Variance" + ) + + scale_y_continuous(labels = function(x) { + scales::label_scientific()(x) %>% + paste0("$", .) + }) + + theme_minimal() +``` + +### Distribution of Sales and SSE + +```{r _model_distribution_sales_sse_chart} +ggplot(training_data_monthly, aes(x = date)) + + geom_bar(aes(y = percent_sales, fill = "Sales"), + stat = "identity", position = "identity", alpha = 0.5 + ) + + geom_bar(aes(y = percent_sse, fill = "Sum of Square Errors"), + stat = "identity", position = "identity", alpha = 0.5 + ) + + scale_fill_manual( + values = c("Sales" = "#00BFC4", "Sum of Square Errors" = "#F8766D") + ) + + labs( + x = "Date", + y = "Normalized Scale", + fill = "", + ) + + theme_minimal() + + theme(legend.position = "bottom") +``` + +::: From 7e32eff94840e7ccc0e6d663bd930b394f3ca7e9 Mon Sep 17 00:00:00 2001 From: Dan Snow Date: Thu, 9 Jan 2025 18:43:41 +0000 Subject: [PATCH 4/6] Update training data --- dvc.lock | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/dvc.lock b/dvc.lock index 62d58f4..8fcc692 100644 --- a/dvc.lock +++ b/dvc.lock @@ -5,17 +5,17 @@ stages: deps: - path: pipeline/00-ingest.R hash: md5 - md5: 29292ee2bef109914c423c9259aa8879 - size: 22847 + md5: 816b28ff1c68d17a9082d0dc839a85c0 + size: 22844 params: params.yaml: assessment: year: '2024' date: '2024-01-01' - triad: city + triad: north group: condo data_year: '2023' - working_year: '2024' + working_year: '2025' input: min_sale_year: '2015' max_sale_year: '2023' @@ -31,24 +31,24 @@ stages: outs: - path: input/assessment_data.parquet hash: md5 - md5: b49601e8a812659026c7358d84f5e16b - size: 85702121 + md5: 1acef7f3c22353411bc15a03d7493164 + size: 85643154 - path: input/char_data.parquet hash: md5 - md5: d1a30dd51db2985be57548c1498f2533 - size: 160972976 + md5: 5be564143ebae5a67e8f44eb93d839dd + size: 159013932 - path: input/condo_strata_data.parquet hash: md5 - md5: 8fe86e0af29431ecb021f101f79789ee - size: 40481 + md5: b5a85462a7f4de94916b228be45ccd75 + size: 40543 - path: input/land_nbhd_rate_data.parquet hash: md5 md5: f3ec9627322bd271bf2957b7388aaa34 size: 3873 - path: input/training_data.parquet hash: md5 - md5: 9b2510ac885e4fc77928661a012d8821 - size: 79812730 + md5: e818848026f6dc6e3d6af9b8d6b34641 + size: 79923460 train: cmd: Rscript pipeline/01-train.R deps: From ae3d20cb22e51e32f961e5b7cea43453d3fb8bcd Mon Sep 17 00:00:00 2001 From: Dan Snow Date: Thu, 9 Jan 2025 19:15:08 +0000 Subject: [PATCH 5/6] Remove multi-card filter --- reports/performance/_model.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index ff061de..baeeaae 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1045,7 +1045,7 @@ These plot shows show trends in the variance of sale price and estimated FMV. Id ```{r _model_organize_variance_data} training_data_monthly <- training_data_pred %>% - filter(!ind_pin_is_multicard, !sv_is_outlier) %>% + filter(!sv_is_outlier) %>% mutate( meta_sale_date = as.Date(meta_sale_date), year = year(meta_sale_date), From 6af9b011aec71ef6b3ea594c451c25dd02d40661 Mon Sep 17 00:00:00 2001 From: Dan Snow Date: Fri, 10 Jan 2025 15:39:33 +0000 Subject: [PATCH 6/6] Update DVC lockfile --- dvc.lock | 337 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 194 insertions(+), 143 deletions(-) diff --git a/dvc.lock b/dvc.lock index 8fcc692..78d2332 100644 --- a/dvc.lock +++ b/dvc.lock @@ -31,12 +31,12 @@ stages: outs: - path: input/assessment_data.parquet hash: md5 - md5: 1acef7f3c22353411bc15a03d7493164 - size: 85643154 + md5: 9a13f7248f1d80079be339ed1d995088 + size: 86228842 - path: input/char_data.parquet hash: md5 - md5: 5be564143ebae5a67e8f44eb93d839dd - size: 159013932 + md5: 23b25c36873492d884125a3c8ee2dfbb + size: 160028159 - path: input/condo_strata_data.parquet hash: md5 md5: b5a85462a7f4de94916b228be45ccd75 @@ -54,8 +54,12 @@ stages: deps: - path: input/training_data.parquet hash: md5 - md5: 51090aa4f5b5311b1441e62b81fd3827 - size: 68987740 + md5: e818848026f6dc6e3d6af9b8d6b34641 + size: 79923460 + - path: pipeline/01-train.R + hash: md5 + md5: 3cdf7f4f1dc9eb8056b7a133685d7d74 + size: 17278 params: params.yaml: cv: @@ -165,7 +169,6 @@ stages: - prox_num_pin_in_half_mile - prox_num_bus_stop_in_half_mile - prox_num_foreclosure_per_1000_pin_past_5_years - - prox_num_school_in_half_mile - prox_airport_dnl_total - prox_nearest_bike_trail_dist_ft - prox_nearest_cemetery_dist_ft @@ -173,20 +176,25 @@ stages: - prox_nearest_cta_stop_dist_ft - prox_nearest_hospital_dist_ft - prox_lake_michigan_dist_ft - - prox_nearest_major_road_dist_ft - prox_nearest_metra_route_dist_ft - prox_nearest_metra_stop_dist_ft - prox_nearest_park_dist_ft - prox_nearest_railroad_dist_ft - - prox_nearest_secondary_road_dist_ft - prox_nearest_university_dist_ft - prox_nearest_vacant_land_dist_ft - prox_nearest_water_dist_ft - prox_nearest_golf_course_dist_ft + - prox_nearest_road_highway_dist_ft + - prox_nearest_road_arterial_dist_ft + - prox_nearest_road_collector_dist_ft + - prox_nearest_road_highway_daily_traffic + - prox_nearest_road_arterial_daily_traffic + - prox_nearest_road_collector_daily_traffic + - prox_nearest_new_construction_dist_ft + - prox_nearest_stadium_dist_ft - acs5_percent_age_children - acs5_percent_age_senior - acs5_median_age_total - - acs5_percent_mobility_moved_from_other_state - acs5_percent_household_family_married - acs5_percent_household_nonfamily_alone - acs5_percent_education_high_school @@ -200,11 +208,8 @@ stages: - acs5_median_household_total_occupied_year_built - acs5_median_household_renter_occupied_gross_rent - acs5_percent_household_owner_occupied - - acs5_percent_household_total_occupied_w_sel_cond - - acs5_percent_mobility_moved_in_county - other_tax_bill_rate - ccao_is_active_exe_homeowner - - ccao_is_corner_lot - ccao_n_years_exe_homeowner - time_sale_year - time_sale_day @@ -214,6 +219,12 @@ stages: - time_sale_day_of_month - time_sale_day_of_week - time_sale_post_covid + - shp_parcel_centroid_dist_ft_sd + - shp_parcel_edge_len_ft_sd + - shp_parcel_interior_angle_sd + - shp_parcel_mrr_area_ratio + - shp_parcel_mrr_side_ratio + - shp_parcel_num_vertices - meta_strata_1 - meta_strata_2 categorical: @@ -269,68 +280,72 @@ stages: outs: - path: output/intermediate/timing/model_timing_train.parquet hash: md5 - md5: 0b5c189c84736f99942b1aabe5582870 - size: 2879 + md5: 49705f359b1ebcefbacd574e8127f414 + size: 2494 - path: output/parameter_final/model_parameter_final.parquet hash: md5 - md5: b234a91486b487642e8738306f87c25c - size: 8857 + md5: f4058c1bc4e6ad85bb39979386a7925e + size: 6658 - path: output/parameter_range/model_parameter_range.parquet hash: md5 - md5: 150000269b5873fa1b3eaeeff7887ce2 + md5: a47965c8cbafb84368f2a21a047bc7f2 size: 501 - path: output/parameter_search/model_parameter_search.parquet hash: md5 - md5: 150000269b5873fa1b3eaeeff7887ce2 + md5: a47965c8cbafb84368f2a21a047bc7f2 size: 501 - path: output/test_card/model_test_card.parquet hash: md5 - md5: e95956454d04a68669f04f5355af3b5e - size: 1342825 + md5: 26a22ab188ade56ab5f626f67bd3ba81 + size: 1363025 - path: output/workflow/fit/model_workflow_fit.zip hash: md5 - md5: 5a607521588c3aca5761150390082127 - size: 15244546 + md5: 8e02d33bae096b58a2030e3df67a204f + size: 12548762 - path: output/workflow/recipe/model_workflow_recipe.rds hash: md5 - md5: c672f98b0b68e5a16adb0b687b43adca - size: 4199953 + md5: e1efaad85652cdfcce1f09ca9e17a8b2 + size: 4298090 assess: cmd: Rscript pipeline/02-assess.R deps: - path: input/assessment_data.parquet hash: md5 - md5: 605ee612ff45dca2edf5c508993a7f56 - size: 69522635 + md5: 9a13f7248f1d80079be339ed1d995088 + size: 86228842 - path: input/condo_strata_data.parquet hash: md5 - md5: 0a7462f0afccb09bdd94c58148a3ca8d - size: 40842 + md5: b5a85462a7f4de94916b228be45ccd75 + size: 40543 - path: input/land_nbhd_rate_data.parquet hash: md5 - md5: e508daf5790982c303d6503fe1cb8e2b - size: 4413 + md5: f3ec9627322bd271bf2957b7388aaa34 + size: 3873 - path: input/training_data.parquet hash: md5 - md5: 51090aa4f5b5311b1441e62b81fd3827 - size: 68987740 + md5: e818848026f6dc6e3d6af9b8d6b34641 + size: 79923460 - path: output/workflow/fit/model_workflow_fit.zip hash: md5 - md5: 5a607521588c3aca5761150390082127 - size: 15244546 + md5: 8e02d33bae096b58a2030e3df67a204f + size: 12548762 - path: output/workflow/recipe/model_workflow_recipe.rds hash: md5 - md5: c672f98b0b68e5a16adb0b687b43adca - size: 4199953 + md5: e1efaad85652cdfcce1f09ca9e17a8b2 + size: 4298090 + - path: pipeline/02-assess.R + hash: md5 + md5: 82b43cd8084454f1712d6fc859a93e2e + size: 18054 params: params.yaml: assessment: year: '2024' date: '2024-01-01' - triad: city + triad: north group: condo data_year: '2023' - working_year: '2024' + working_year: '2025' model.predictor.all: - meta_township_code - meta_nbhd_code @@ -357,7 +372,6 @@ stages: - prox_num_pin_in_half_mile - prox_num_bus_stop_in_half_mile - prox_num_foreclosure_per_1000_pin_past_5_years - - prox_num_school_in_half_mile - prox_airport_dnl_total - prox_nearest_bike_trail_dist_ft - prox_nearest_cemetery_dist_ft @@ -365,20 +379,25 @@ stages: - prox_nearest_cta_stop_dist_ft - prox_nearest_hospital_dist_ft - prox_lake_michigan_dist_ft - - prox_nearest_major_road_dist_ft - prox_nearest_metra_route_dist_ft - prox_nearest_metra_stop_dist_ft - prox_nearest_park_dist_ft - prox_nearest_railroad_dist_ft - - prox_nearest_secondary_road_dist_ft - prox_nearest_university_dist_ft - prox_nearest_vacant_land_dist_ft - prox_nearest_water_dist_ft - prox_nearest_golf_course_dist_ft + - prox_nearest_road_highway_dist_ft + - prox_nearest_road_arterial_dist_ft + - prox_nearest_road_collector_dist_ft + - prox_nearest_road_highway_daily_traffic + - prox_nearest_road_arterial_daily_traffic + - prox_nearest_road_collector_daily_traffic + - prox_nearest_new_construction_dist_ft + - prox_nearest_stadium_dist_ft - acs5_percent_age_children - acs5_percent_age_senior - acs5_median_age_total - - acs5_percent_mobility_moved_from_other_state - acs5_percent_household_family_married - acs5_percent_household_nonfamily_alone - acs5_percent_education_high_school @@ -392,11 +411,8 @@ stages: - acs5_median_household_total_occupied_year_built - acs5_median_household_renter_occupied_gross_rent - acs5_percent_household_owner_occupied - - acs5_percent_household_total_occupied_w_sel_cond - - acs5_percent_mobility_moved_in_county - other_tax_bill_rate - ccao_is_active_exe_homeowner - - ccao_is_corner_lot - ccao_n_years_exe_homeowner - time_sale_year - time_sale_day @@ -406,6 +422,12 @@ stages: - time_sale_day_of_month - time_sale_day_of_week - time_sale_post_covid + - shp_parcel_centroid_dist_ft_sd + - shp_parcel_edge_len_ft_sd + - shp_parcel_interior_angle_sd + - shp_parcel_mrr_area_ratio + - shp_parcel_mrr_side_ratio + - shp_parcel_num_vertices - meta_strata_1 - meta_strata_2 pv: @@ -443,36 +465,40 @@ stages: outs: - path: output/assessment_card/model_assessment_card.parquet hash: md5 - md5: 3442b0b0fb25364caba810a507213109 - size: 38822670 + md5: 4419f4c0f8173670f7a4bc102d9762a0 + size: 46969952 - path: output/assessment_pin/model_assessment_pin.parquet hash: md5 - md5: ae6242ed4427ccd87acab2d87435ab8f - size: 41641680 + md5: 38a950b7f3607c93e25866bcc67c694a + size: 42501219 - path: output/intermediate/timing/model_timing_assess.parquet hash: md5 - md5: 6e16f8a8ecb256d0555e05258630cc29 - size: 2886 + md5: b659d42d1fdfc238577e1fc52e96e8f6 + size: 2499 evaluate: cmd: Rscript pipeline/03-evaluate.R deps: - path: output/assessment_pin/model_assessment_pin.parquet hash: md5 - md5: ae6242ed4427ccd87acab2d87435ab8f - size: 41641680 + md5: 38a950b7f3607c93e25866bcc67c694a + size: 42501219 - path: output/test_card/model_test_card.parquet hash: md5 - md5: e95956454d04a68669f04f5355af3b5e - size: 1342825 + md5: 26a22ab188ade56ab5f626f67bd3ba81 + size: 1363025 + - path: pipeline/03-evaluate.R + hash: md5 + md5: ff504eb22892ae0908bbaaf4e76da4f4 + size: 17443 params: params.yaml: assessment: year: '2024' date: '2024-01-01' - triad: city + triad: north group: condo data_year: '2023' - working_year: '2024' + working_year: '2025' ratio_study: far_year: '2021' far_stage: board @@ -498,39 +524,43 @@ stages: outs: - path: output/intermediate/timing/model_timing_evaluate.parquet hash: md5 - md5: a6ba362bf2c50b27aae7bb688e4c2b68 - size: 2900 + md5: 917fb8fdf8a1530807d02bd5f002dd04 + size: 2509 - path: output/performance/model_performance_assessment.parquet hash: md5 - md5: 6c43dfc44d5e8186f037b5c6d7bbd8b1 - size: 573773 + md5: 8acce8518f3349354d42065069709a73 + size: 284963 - path: output/performance/model_performance_test.parquet hash: md5 - md5: 9867d9222eb5ff618f69b185ffc7452c - size: 1060602 + md5: 496718f9356631595e331ed8e6d4fc46 + size: 1029262 - path: output/performance_quantile/model_performance_quantile_assessment.parquet hash: md5 - md5: 8fb50ba32609879ad5fc9b196e07bdae - size: 461742 + md5: e710c2fc8488cce52d078b833b0cb9ad + size: 220661 - path: output/performance_quantile/model_performance_quantile_test.parquet hash: md5 - md5: 5d5b3e0c69fab782974f89c4bbbf75fb - size: 1055715 + md5: b8df5bae8482a12fc2ab84cfa273cbe4 + size: 1032173 interpret: cmd: Rscript pipeline/04-interpret.R deps: - path: input/assessment_data.parquet hash: md5 - md5: 605ee612ff45dca2edf5c508993a7f56 - size: 69522635 + md5: 9a13f7248f1d80079be339ed1d995088 + size: 86228842 - path: output/workflow/fit/model_workflow_fit.zip hash: md5 - md5: 5a607521588c3aca5761150390082127 - size: 15244546 + md5: 8e02d33bae096b58a2030e3df67a204f + size: 12548762 - path: output/workflow/recipe/model_workflow_recipe.rds hash: md5 - md5: c672f98b0b68e5a16adb0b687b43adca - size: 4199953 + md5: e1efaad85652cdfcce1f09ca9e17a8b2 + size: 4298090 + - path: pipeline/04-interpret.R + hash: md5 + md5: 51795fcf45dabc142f57c7b6e524b74b + size: 4194 params: params.yaml: model.predictor.all: @@ -559,7 +589,6 @@ stages: - prox_num_pin_in_half_mile - prox_num_bus_stop_in_half_mile - prox_num_foreclosure_per_1000_pin_past_5_years - - prox_num_school_in_half_mile - prox_airport_dnl_total - prox_nearest_bike_trail_dist_ft - prox_nearest_cemetery_dist_ft @@ -567,20 +596,25 @@ stages: - prox_nearest_cta_stop_dist_ft - prox_nearest_hospital_dist_ft - prox_lake_michigan_dist_ft - - prox_nearest_major_road_dist_ft - prox_nearest_metra_route_dist_ft - prox_nearest_metra_stop_dist_ft - prox_nearest_park_dist_ft - prox_nearest_railroad_dist_ft - - prox_nearest_secondary_road_dist_ft - prox_nearest_university_dist_ft - prox_nearest_vacant_land_dist_ft - prox_nearest_water_dist_ft - prox_nearest_golf_course_dist_ft + - prox_nearest_road_highway_dist_ft + - prox_nearest_road_arterial_dist_ft + - prox_nearest_road_collector_dist_ft + - prox_nearest_road_highway_daily_traffic + - prox_nearest_road_arterial_daily_traffic + - prox_nearest_road_collector_daily_traffic + - prox_nearest_new_construction_dist_ft + - prox_nearest_stadium_dist_ft - acs5_percent_age_children - acs5_percent_age_senior - acs5_median_age_total - - acs5_percent_mobility_moved_from_other_state - acs5_percent_household_family_married - acs5_percent_household_nonfamily_alone - acs5_percent_education_high_school @@ -594,11 +628,8 @@ stages: - acs5_median_household_total_occupied_year_built - acs5_median_household_renter_occupied_gross_rent - acs5_percent_household_owner_occupied - - acs5_percent_household_total_occupied_w_sel_cond - - acs5_percent_mobility_moved_in_county - other_tax_bill_rate - ccao_is_active_exe_homeowner - - ccao_is_corner_lot - ccao_n_years_exe_homeowner - time_sale_year - time_sale_day @@ -608,41 +639,51 @@ stages: - time_sale_day_of_month - time_sale_day_of_week - time_sale_post_covid + - shp_parcel_centroid_dist_ft_sd + - shp_parcel_edge_len_ft_sd + - shp_parcel_interior_angle_sd + - shp_parcel_mrr_area_ratio + - shp_parcel_mrr_side_ratio + - shp_parcel_num_vertices - meta_strata_1 - meta_strata_2 toggle.shap_enable: false outs: - path: output/feature_importance/model_feature_importance.parquet hash: md5 - md5: 61db6f11d2ea7aa53d6990445b5d9cd2 - size: 8582 + md5: d13ab8e795062aae622105e6da62571d + size: 8033 - path: output/intermediate/timing/model_timing_interpret.parquet hash: md5 - md5: 906ad56aba8f66c9a0b32c5ed9b2e5a7 - size: 2914 + md5: 42bd17beb63fcd4e4b39517ee4558b4f + size: 2519 - path: output/shap/model_shap.parquet hash: md5 - md5: 150000269b5873fa1b3eaeeff7887ce2 + md5: a47965c8cbafb84368f2a21a047bc7f2 size: 501 finalize: cmd: Rscript pipeline/05-finalize.R deps: - path: output/intermediate/timing/model_timing_assess.parquet hash: md5 - md5: 6e16f8a8ecb256d0555e05258630cc29 - size: 2886 + md5: b659d42d1fdfc238577e1fc52e96e8f6 + size: 2499 - path: output/intermediate/timing/model_timing_evaluate.parquet hash: md5 - md5: a6ba362bf2c50b27aae7bb688e4c2b68 - size: 2900 + md5: 917fb8fdf8a1530807d02bd5f002dd04 + size: 2509 - path: output/intermediate/timing/model_timing_interpret.parquet hash: md5 - md5: 906ad56aba8f66c9a0b32c5ed9b2e5a7 - size: 2914 + md5: 42bd17beb63fcd4e4b39517ee4558b4f + size: 2519 - path: output/intermediate/timing/model_timing_train.parquet hash: md5 - md5: 0b5c189c84736f99942b1aabe5582870 - size: 2879 + md5: 49705f359b1ebcefbacd574e8127f414 + size: 2494 + - path: pipeline/05-finalize.R + hash: md5 + md5: df815760b41cedc8e41132262d2977c7 + size: 8074 params: params.yaml: cv: @@ -700,7 +741,6 @@ stages: - prox_num_pin_in_half_mile - prox_num_bus_stop_in_half_mile - prox_num_foreclosure_per_1000_pin_past_5_years - - prox_num_school_in_half_mile - prox_airport_dnl_total - prox_nearest_bike_trail_dist_ft - prox_nearest_cemetery_dist_ft @@ -708,20 +748,25 @@ stages: - prox_nearest_cta_stop_dist_ft - prox_nearest_hospital_dist_ft - prox_lake_michigan_dist_ft - - prox_nearest_major_road_dist_ft - prox_nearest_metra_route_dist_ft - prox_nearest_metra_stop_dist_ft - prox_nearest_park_dist_ft - prox_nearest_railroad_dist_ft - - prox_nearest_secondary_road_dist_ft - prox_nearest_university_dist_ft - prox_nearest_vacant_land_dist_ft - prox_nearest_water_dist_ft - prox_nearest_golf_course_dist_ft + - prox_nearest_road_highway_dist_ft + - prox_nearest_road_arterial_dist_ft + - prox_nearest_road_collector_dist_ft + - prox_nearest_road_highway_daily_traffic + - prox_nearest_road_arterial_daily_traffic + - prox_nearest_road_collector_daily_traffic + - prox_nearest_new_construction_dist_ft + - prox_nearest_stadium_dist_ft - acs5_percent_age_children - acs5_percent_age_senior - acs5_median_age_total - - acs5_percent_mobility_moved_from_other_state - acs5_percent_household_family_married - acs5_percent_household_nonfamily_alone - acs5_percent_education_high_school @@ -735,11 +780,8 @@ stages: - acs5_median_household_total_occupied_year_built - acs5_median_household_renter_occupied_gross_rent - acs5_percent_household_owner_occupied - - acs5_percent_household_total_occupied_w_sel_cond - - acs5_percent_mobility_moved_in_county - other_tax_bill_rate - ccao_is_active_exe_homeowner - - ccao_is_corner_lot - ccao_n_years_exe_homeowner - time_sale_year - time_sale_day @@ -749,6 +791,12 @@ stages: - time_sale_day_of_month - time_sale_day_of_week - time_sale_post_covid + - shp_parcel_centroid_dist_ft_sd + - shp_parcel_edge_len_ft_sd + - shp_parcel_interior_angle_sd + - shp_parcel_mrr_area_ratio + - shp_parcel_mrr_side_ratio + - shp_parcel_num_vertices - meta_strata_1 - meta_strata_2 categorical: @@ -877,29 +925,28 @@ stages: - loc_school_elementary_district_geoid - loc_school_secondary_district_geoid - loc_school_unified_district_geoid - run_note: "Test run for updated 2024 model pipeline. Remove CCAO collected\ - \ characteristics.\n" + run_note: Preparing for 2025 model with 2024 data toggle: cv_enable: false shap_enable: false - upload_enable: true + upload_enable: false outs: - path: output/intermediate/timing/model_timing_finalize.parquet hash: md5 - md5: 172ddb18b1c2e7f4593187f9d3f13069 - size: 2893 + md5: 9d916ce0f8a5786e820c41c57bd1e4bc + size: 2514 - path: output/metadata/model_metadata.parquet hash: md5 - md5: 5bfe8e50f50463253a3f8f4fa3164bb8 - size: 29757 + md5: cf3e0d3290ab71603f5a051b9cb96daa + size: 21974 - path: output/timing/model_timing.parquet hash: md5 - md5: 736810f7363817b6023d98b1e74d05af - size: 6032 + md5: a5997ef8c4c50e1d1d3b1f445697d4f7 + size: 5118 - path: reports/performance/performance.html hash: md5 - md5: 004b653e50e9513fc04ad1fc1d5ca544 - size: 80 + md5: 6cc1c436907d481f2f1f3278cb583c2b + size: 21440143 export: cmd: Rscript pipeline/07-export.R params: @@ -937,69 +984,73 @@ stages: deps: - path: output/assessment_card/model_assessment_card.parquet hash: md5 - md5: 3442b0b0fb25364caba810a507213109 - size: 38822670 + md5: 4419f4c0f8173670f7a4bc102d9762a0 + size: 46969952 - path: output/assessment_pin/model_assessment_pin.parquet hash: md5 - md5: ae6242ed4427ccd87acab2d87435ab8f - size: 41641680 + md5: 38a950b7f3607c93e25866bcc67c694a + size: 42501219 - path: output/feature_importance/model_feature_importance.parquet hash: md5 - md5: 61db6f11d2ea7aa53d6990445b5d9cd2 - size: 8582 + md5: d13ab8e795062aae622105e6da62571d + size: 8033 - path: output/metadata/model_metadata.parquet hash: md5 - md5: 5bfe8e50f50463253a3f8f4fa3164bb8 - size: 29757 + md5: cf3e0d3290ab71603f5a051b9cb96daa + size: 21974 - path: output/parameter_final/model_parameter_final.parquet hash: md5 - md5: b234a91486b487642e8738306f87c25c - size: 8857 + md5: f4058c1bc4e6ad85bb39979386a7925e + size: 6658 - path: output/parameter_range/model_parameter_range.parquet hash: md5 - md5: 150000269b5873fa1b3eaeeff7887ce2 + md5: a47965c8cbafb84368f2a21a047bc7f2 size: 501 - path: output/parameter_search/model_parameter_search.parquet hash: md5 - md5: 150000269b5873fa1b3eaeeff7887ce2 + md5: a47965c8cbafb84368f2a21a047bc7f2 size: 501 - path: output/performance/model_performance_assessment.parquet hash: md5 - md5: 6c43dfc44d5e8186f037b5c6d7bbd8b1 - size: 573773 + md5: 8acce8518f3349354d42065069709a73 + size: 284963 - path: output/performance/model_performance_test.parquet hash: md5 - md5: 9867d9222eb5ff618f69b185ffc7452c - size: 1060602 + md5: 496718f9356631595e331ed8e6d4fc46 + size: 1029262 - path: output/performance_quantile/model_performance_quantile_assessment.parquet hash: md5 - md5: 8fb50ba32609879ad5fc9b196e07bdae - size: 461742 + md5: e710c2fc8488cce52d078b833b0cb9ad + size: 220661 - path: output/performance_quantile/model_performance_quantile_test.parquet hash: md5 - md5: 5d5b3e0c69fab782974f89c4bbbf75fb - size: 1055715 + md5: b8df5bae8482a12fc2ab84cfa273cbe4 + size: 1032173 - path: output/shap/model_shap.parquet hash: md5 - md5: 150000269b5873fa1b3eaeeff7887ce2 + md5: a47965c8cbafb84368f2a21a047bc7f2 size: 501 - path: output/test_card/model_test_card.parquet hash: md5 - md5: e95956454d04a68669f04f5355af3b5e - size: 1342825 + md5: 26a22ab188ade56ab5f626f67bd3ba81 + size: 1363025 - path: output/timing/model_timing.parquet hash: md5 - md5: 736810f7363817b6023d98b1e74d05af - size: 6032 + md5: a5997ef8c4c50e1d1d3b1f445697d4f7 + size: 5118 - path: output/workflow/fit/model_workflow_fit.zip hash: md5 - md5: 5a607521588c3aca5761150390082127 - size: 15244546 + md5: 8e02d33bae096b58a2030e3df67a204f + size: 12548762 - path: output/workflow/recipe/model_workflow_recipe.rds hash: md5 - md5: c672f98b0b68e5a16adb0b687b43adca - size: 4199953 + md5: e1efaad85652cdfcce1f09ca9e17a8b2 + size: 4298090 + - path: pipeline/06-upload.R + hash: md5 + md5: 613632039c6744d3132a8760c1b51099 + size: 10855 - path: reports/performance/performance.html hash: md5 - md5: 004b653e50e9513fc04ad1fc1d5ca544 - size: 80 + md5: 6cc1c436907d481f2f1f3278cb583c2b + size: 21440143