From 3c8d52cb01208756c136fe50f5bc4e2b5646347d Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 20 Dec 2024 21:44:04 +0000 Subject: [PATCH 01/23] Initial push --- reports/_setup.qmd | 2 +- reports/performance/_stats.qmd | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/reports/_setup.qmd b/reports/_setup.qmd index 569c43b5..9314d4db 100644 --- a/reports/_setup.qmd +++ b/reports/_setup.qmd @@ -1,6 +1,6 @@ --- params: - run_id: "2024-03-17-stupefied-maya" + run_id: "2024-12-19-wonderful-yuxin" year: "2024" --- diff --git a/reports/performance/_stats.qmd b/reports/performance/_stats.qmd index ebc1934d..bdba2356 100644 --- a/reports/performance/_stats.qmd +++ b/reports/performance/_stats.qmd @@ -55,10 +55,10 @@ stats_sf_parcels <- chars_data %>% ) ) %>% select(sale, all_of(stats_regression_vars)) %>% - vars_recode( - code_type = "long", - dictionary = ccao::vars_dict_legacy - ) %>% + # vars_recode( + # code_type = "long", + # dictionary = ccao::vars_dict_legacy + # ) %>% vars_rename(names_from = "athena", names_to = "pretty") %>% rename_with(~ gsub("loc_", "", .x)) %>% rename_with(~ gsub("_", " ", .x)) %>% @@ -385,3 +385,8 @@ stats_median_yoy_delta %>% c("Comparison of YOY Change in AV for Sold and Unsold Houses" = 7) ) ``` + +```{r} + +``` + From 110c9dfb2c31bf025ae7624b96f7a4c105450a1b Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 26 Dec 2024 15:40:57 +0000 Subject: [PATCH 02/23] Include Variance over Time --- reports/performance/_model.qmd | 121 +++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index 5fc293fe..334ee8c8 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1258,3 +1258,124 @@ model_big_misses_assessment %>% ``` ::: + +## Variance Over Time + +::: {.panel-tabset} + + +```{r, include = FALSE} +training_data_monthly <- training_data_pred %>% + mutate( + meta_sale_date = as.Date(meta_sale_date), + year = year(meta_sale_date), + month = month(meta_sale_date), + difference = (pred_card_initial_fmv - meta_sale_price), + squared_difference = difference^2 + ) %>% + filter(abs(difference) < 100000) %>% + group_by(year, month) %>% + summarize( + total_sales = sum(meta_sale_price), + total_fmv = sum(pred_card_initial_fmv), + variance_sale = var(meta_sale_price), + variance_fmv = var(pred_card_initial_fmv), + mean_difference = mean(difference), + sse = sum(squared_difference), # Sum of Square Error + n = n(), + .groups = "drop" + ) %>% + mutate( + variance_sale = variance_sale, + variance_fmv = variance_fmv, + total_sales_overall = sum(n), + total_sse_overall = sum(sse), + percent_sales = total_sales_overall / n * 100, + percent_sse = sse / total_sse_overall * 100, + variance_diff = variance_fmv - variance_sale, + date = make_date(year, month), + variance_ratio = variance_fmv / variance_sale, + percent_sales = n / sum(n) * 100, + percent_sse = sse / sum(sse) * 100 + ) + +training_data_monthly_long <- training_data_monthly %>% + pivot_longer( + cols = c(variance_sale, variance_fmv, percent_sales, percent_sse, variance_diff), + names_to = "Metric", + values_to = "Value" + ) + +``` + +### Variance Ratio + +```{r} +ggplot(training_data_monthly, aes(x = date, y = variance_ratio)) + + geom_line() + + geom_point() + + labs( + x = "Date", + y = "Variance Ratio" + ) + + theme_minimal() + +``` +### Total $ Variance of Sale Price and FMV +```{r} +ggplot(training_data_monthly_long %>% filter(Metric %in% c("variance_sale", "variance_fmv")), + aes(x = date, y = Value, color = Metric)) + + geom_line() + + geom_point() + + geom_smooth(method = "loess", se = FALSE) + + labs( + x = "Month", + y = "Variance", + color = "Metric" + ) + + scale_color_discrete( + labels = c("variance_sale" = "Variance of Sale Price", + "variance_fmv" = "Variance of FMV") + ) + + scale_y_continuous(labels = function(x) { + scales::label_scientific()(x) %>% + paste0("$", .) + }) + + theme_minimal() +``` +### Difference Between Variance of FMV and Sale Price + +```{r} +ggplot(training_data_monthly, aes(x = date, y = abs(variance_diff))) + + geom_line() + + geom_point() + + geom_smooth(se = FALSE) + + labs( + x = "Month", + y = "Difference in Variance ($)" + ) + + scale_y_continuous(labels = function(x) { + scales::label_scientific()(x) %>% + paste0("$", .) + }) + + theme_minimal() + + +``` +### Distribution of Sales and SSE + +```{r} +ggplot(training_data_monthly, aes(x = date)) + + geom_bar(aes(y = percent_sales, fill = "Sales"), + stat = "identity", position = "identity", alpha = 0.5) + + geom_bar(aes(y = percent_sse, fill = "Sum of Square Errors"), + stat = "identity", position = "identity", alpha = 0.5) + + labs( + x = "Month", + y = "Normalized Scale" + ) + + theme_minimal() + + theme(legend.position = "bottom") + +``` +::: From d32fb77a0a08d5219a9b1e357848ab81df59edc4 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 26 Dec 2024 15:45:12 +0000 Subject: [PATCH 03/23] remove unnecessary values --- reports/performance/_model.qmd | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index 334ee8c8..7aea7838 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1265,7 +1265,7 @@ model_big_misses_assessment %>% ```{r, include = FALSE} -training_data_monthly <- training_data_pred %>% +training_data_monthlytraining_data_monthly <- training_data_pred %>% mutate( meta_sale_date = as.Date(meta_sale_date), year = year(meta_sale_date), @@ -1286,12 +1286,6 @@ training_data_monthly <- training_data_pred %>% .groups = "drop" ) %>% mutate( - variance_sale = variance_sale, - variance_fmv = variance_fmv, - total_sales_overall = sum(n), - total_sse_overall = sum(sse), - percent_sales = total_sales_overall / n * 100, - percent_sse = sse / total_sse_overall * 100, variance_diff = variance_fmv - variance_sale, date = make_date(year, month), variance_ratio = variance_fmv / variance_sale, From d50b544618b21c1289fa0dc65abfa189ba07c823 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 26 Dec 2024 16:05:32 +0000 Subject: [PATCH 04/23] Rename data --- reports/performance/_model.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index 7aea7838..0040f7e5 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1265,7 +1265,7 @@ model_big_misses_assessment %>% ```{r, include = FALSE} -training_data_monthlytraining_data_monthly <- training_data_pred %>% +training_data_monthly <- training_data_pred %>% mutate( meta_sale_date = as.Date(meta_sale_date), year = year(meta_sale_date), From 5e20b85795763e1d98926af51df0c6f1a8e970cc Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 26 Dec 2024 16:31:11 +0000 Subject: [PATCH 05/23] Fix filtering --- reports/performance/_model.qmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index 0040f7e5..efd9f32d 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1266,6 +1266,7 @@ model_big_misses_assessment %>% ```{r, include = FALSE} training_data_monthly <- training_data_pred %>% + filter(!ind_pin_is_multicard, !sv_is_outlier) %>% mutate( meta_sale_date = as.Date(meta_sale_date), year = year(meta_sale_date), @@ -1273,7 +1274,6 @@ training_data_monthly <- training_data_pred %>% difference = (pred_card_initial_fmv - meta_sale_price), squared_difference = difference^2 ) %>% - filter(abs(difference) < 100000) %>% group_by(year, month) %>% summarize( total_sales = sum(meta_sale_price), @@ -1281,7 +1281,7 @@ training_data_monthly <- training_data_pred %>% variance_sale = var(meta_sale_price), variance_fmv = var(pred_card_initial_fmv), mean_difference = mean(difference), - sse = sum(squared_difference), # Sum of Square Error + sse = sum(squared_difference), n = n(), .groups = "drop" ) %>% From cdd542eb53c2c3ed9522de1985c30e687ba569e3 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 26 Dec 2024 17:12:55 +0000 Subject: [PATCH 06/23] pre commit --- reports/performance/_model.qmd | 31 ++++++++++++++++--------------- reports/performance/_stats.qmd | 1 - 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index efd9f32d..1acc74b9 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1299,7 +1299,6 @@ training_data_monthly_long <- training_data_monthly %>% names_to = "Metric", values_to = "Value" ) - ``` ### Variance Ratio @@ -1313,12 +1312,13 @@ ggplot(training_data_monthly, aes(x = date, y = variance_ratio)) + y = "Variance Ratio" ) + theme_minimal() - ``` ### Total $ Variance of Sale Price and FMV ```{r} -ggplot(training_data_monthly_long %>% filter(Metric %in% c("variance_sale", "variance_fmv")), - aes(x = date, y = Value, color = Metric)) + +ggplot( + training_data_monthly_long %>% filter(Metric %in% c("variance_sale", "variance_fmv")), + aes(x = date, y = Value, color = Metric) +) + geom_line() + geom_point() + geom_smooth(method = "loess", se = FALSE) + @@ -1328,11 +1328,13 @@ ggplot(training_data_monthly_long %>% filter(Metric %in% c("variance_sale", "var color = "Metric" ) + scale_color_discrete( - labels = c("variance_sale" = "Variance of Sale Price", - "variance_fmv" = "Variance of FMV") + labels = c( + "variance_sale" = "Variance of Sale Price", + "variance_fmv" = "Variance of FMV" + ) ) + scale_y_continuous(labels = function(x) { - scales::label_scientific()(x) %>% + scales::label_scientific()(x) %>% paste0("$", .) }) + theme_minimal() @@ -1349,27 +1351,26 @@ ggplot(training_data_monthly, aes(x = date, y = abs(variance_diff))) + y = "Difference in Variance ($)" ) + scale_y_continuous(labels = function(x) { - scales::label_scientific()(x) %>% + scales::label_scientific()(x) %>% paste0("$", .) }) + theme_minimal() - - ``` ### Distribution of Sales and SSE ```{r} ggplot(training_data_monthly, aes(x = date)) + - geom_bar(aes(y = percent_sales, fill = "Sales"), - stat = "identity", position = "identity", alpha = 0.5) + - geom_bar(aes(y = percent_sse, fill = "Sum of Square Errors"), - stat = "identity", position = "identity", alpha = 0.5) + + geom_bar(aes(y = percent_sales, fill = "Sales"), + stat = "identity", position = "identity", alpha = 0.5 + ) + + geom_bar(aes(y = percent_sse, fill = "Sum of Square Errors"), + stat = "identity", position = "identity", alpha = 0.5 + ) + labs( x = "Month", y = "Normalized Scale" ) + theme_minimal() + theme(legend.position = "bottom") - ``` ::: diff --git a/reports/performance/_stats.qmd b/reports/performance/_stats.qmd index bdba2356..92c9ef4b 100644 --- a/reports/performance/_stats.qmd +++ b/reports/performance/_stats.qmd @@ -387,6 +387,5 @@ stats_median_yoy_delta %>% ``` ```{r} - ``` From 526466ccdae27577375c210e19e03069526b4a6d Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 26 Dec 2024 17:16:40 +0000 Subject: [PATCH 07/23] Remove stats changes --- reports/performance/_stats.qmd | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/reports/performance/_stats.qmd b/reports/performance/_stats.qmd index 92c9ef4b..ebc1934d 100644 --- a/reports/performance/_stats.qmd +++ b/reports/performance/_stats.qmd @@ -55,10 +55,10 @@ stats_sf_parcels <- chars_data %>% ) ) %>% select(sale, all_of(stats_regression_vars)) %>% - # vars_recode( - # code_type = "long", - # dictionary = ccao::vars_dict_legacy - # ) %>% + vars_recode( + code_type = "long", + dictionary = ccao::vars_dict_legacy + ) %>% vars_rename(names_from = "athena", names_to = "pretty") %>% rename_with(~ gsub("loc_", "", .x)) %>% rename_with(~ gsub("_", " ", .x)) %>% @@ -385,7 +385,3 @@ stats_median_yoy_delta %>% c("Comparison of YOY Change in AV for Sold and Unsold Houses" = 7) ) ``` - -```{r} -``` - From 74017d2e914ad8cb4d8288672be4c0d832d0c267 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 26 Dec 2024 18:40:34 +0000 Subject: [PATCH 08/23] Add loess --- reports/performance/_model.qmd | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index 1acc74b9..8f460793 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1261,6 +1261,8 @@ model_big_misses_assessment %>% ## Variance Over Time +These charts look at the variance of the sale price and estimated FMV over time. Ideally, these should remain stable. One should be aware that while ratios for variance may stay stable, because housing values tend to increase over time, the aggregate difference may increase. + ::: {.panel-tabset} @@ -1301,7 +1303,7 @@ training_data_monthly_long <- training_data_monthly %>% ) ``` -### Variance Ratio +### Variance Ratio (FMV / Sale Price) ```{r} ggplot(training_data_monthly, aes(x = date, y = variance_ratio)) + @@ -1313,7 +1315,8 @@ ggplot(training_data_monthly, aes(x = date, y = variance_ratio)) + ) + theme_minimal() ``` -### Total $ Variance of Sale Price and FMV + +### Comparison Between Total FMV and Sale Price Variance ```{r} ggplot( training_data_monthly_long %>% filter(Metric %in% c("variance_sale", "variance_fmv")), @@ -1339,7 +1342,9 @@ ggplot( }) + theme_minimal() ``` -### Difference Between Variance of FMV and Sale Price + + +### $ Difference abs(FMV - Sale Price) ```{r} ggplot(training_data_monthly, aes(x = date, y = abs(variance_diff))) + @@ -1356,21 +1361,22 @@ ggplot(training_data_monthly, aes(x = date, y = abs(variance_diff))) + }) + theme_minimal() ``` + ### Distribution of Sales and SSE ```{r} ggplot(training_data_monthly, aes(x = date)) + geom_bar(aes(y = percent_sales, fill = "Sales"), - stat = "identity", position = "identity", alpha = 0.5 - ) + + stat = "identity", position = "identity", alpha = 0.5) + geom_bar(aes(y = percent_sse, fill = "Sum of Square Errors"), - stat = "identity", position = "identity", alpha = 0.5 - ) + + stat = "identity", position = "identity", alpha = 0.5) + labs( x = "Month", - y = "Normalized Scale" + y = "Normalized Scale", + fill = "", ) + theme_minimal() + theme(legend.position = "bottom") + ``` ::: From 938ae1a7ceecff78120ed4303bdcea50758d2629 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 26 Dec 2024 18:43:48 +0000 Subject: [PATCH 09/23] precommit --- reports/performance/_model.qmd | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index 8f460793..b71b80e3 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1367,9 +1367,11 @@ ggplot(training_data_monthly, aes(x = date, y = abs(variance_diff))) + ```{r} ggplot(training_data_monthly, aes(x = date)) + geom_bar(aes(y = percent_sales, fill = "Sales"), - stat = "identity", position = "identity", alpha = 0.5) + + stat = "identity", position = "identity", alpha = 0.5 + ) + geom_bar(aes(y = percent_sse, fill = "Sum of Square Errors"), - stat = "identity", position = "identity", alpha = 0.5) + + stat = "identity", position = "identity", alpha = 0.5 + ) + labs( x = "Month", y = "Normalized Scale", @@ -1377,6 +1379,5 @@ ggplot(training_data_monthly, aes(x = date)) + ) + theme_minimal() + theme(legend.position = "bottom") - ``` ::: From e0aad79ff3cd72d22abdfe794332e3667db8be27 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 27 Dec 2024 15:29:36 +0000 Subject: [PATCH 10/23] Remove Comparison --- reports/performance/_model.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index b71b80e3..a67bc286 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1316,7 +1316,7 @@ ggplot(training_data_monthly, aes(x = date, y = variance_ratio)) + theme_minimal() ``` -### Comparison Between Total FMV and Sale Price Variance +### Total FMV and Sale Price Variance ```{r} ggplot( training_data_monthly_long %>% filter(Metric %in% c("variance_sale", "variance_fmv")), From d2b3c58cbc3c5f27bed76de86de4e8e8d5c1f63a Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Fri, 27 Dec 2024 12:48:49 -0600 Subject: [PATCH 11/23] Update _setup.qmd --- reports/_setup.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reports/_setup.qmd b/reports/_setup.qmd index 9314d4db..71b43e1f 100644 --- a/reports/_setup.qmd +++ b/reports/_setup.qmd @@ -1,7 +1,7 @@ --- params: run_id: "2024-12-19-wonderful-yuxin" - year: "2024" + year: "2025" --- ```{r} From 086693575facb15eddbcc9720062fe13047529fc Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Fri, 27 Dec 2024 12:49:54 -0600 Subject: [PATCH 12/23] Update _model.qmd --- reports/performance/_model.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index a67bc286..60be057e 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1261,7 +1261,7 @@ model_big_misses_assessment %>% ## Variance Over Time -These charts look at the variance of the sale price and estimated FMV over time. Ideally, these should remain stable. One should be aware that while ratios for variance may stay stable, because housing values tend to increase over time, the aggregate difference may increase. +These charts look at the variance of the sale price and estimated FMV over time. Ideally, these should remain stable. One should be aware that while ratios may demonstrate consistent trends, but because housing values tend to increase over time, the aggregate difference may increase. ::: {.panel-tabset} From bccb2050e0c7f97035beff2efcc0f6c6337bcd6a Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Fri, 27 Dec 2024 12:50:33 -0600 Subject: [PATCH 13/23] Update _model.qmd --- reports/performance/_model.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index 60be057e..fe782d44 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1261,7 +1261,7 @@ model_big_misses_assessment %>% ## Variance Over Time -These charts look at the variance of the sale price and estimated FMV over time. Ideally, these should remain stable. One should be aware that while ratios may demonstrate consistent trends, but because housing values tend to increase over time, the aggregate difference may increase. +These charts look at the variance of the sale price and estimated FMV over time. Ideally, these should remain stable. One should be aware that while ratios may demonstrate consistent trends, because housing values tend to increase over time, the aggregate difference may increase. ::: {.panel-tabset} From 5fa5ad39fb459d513318cdef253585c19417c98f Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 27 Dec 2024 19:13:14 +0000 Subject: [PATCH 14/23] add names --- reports/performance/_model.qmd | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index a67bc286..ef68fdeb 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1266,7 +1266,7 @@ These charts look at the variance of the sale price and estimated FMV over time. ::: {.panel-tabset} -```{r, include = FALSE} +```{r _organize_variance_data} training_data_monthly <- training_data_pred %>% filter(!ind_pin_is_multicard, !sv_is_outlier) %>% mutate( @@ -1305,7 +1305,7 @@ training_data_monthly_long <- training_data_monthly %>% ### Variance Ratio (FMV / Sale Price) -```{r} +```{r _variance_ratio_chart} ggplot(training_data_monthly, aes(x = date, y = variance_ratio)) + geom_line() + geom_point() + @@ -1317,7 +1317,7 @@ ggplot(training_data_monthly, aes(x = date, y = variance_ratio)) + ``` ### Total FMV and Sale Price Variance -```{r} +```{r _overall_variance_chart} ggplot( training_data_monthly_long %>% filter(Metric %in% c("variance_sale", "variance_fmv")), aes(x = date, y = Value, color = Metric) @@ -1346,7 +1346,7 @@ ggplot( ### $ Difference abs(FMV - Sale Price) -```{r} +```{r _variance_diff_chart} ggplot(training_data_monthly, aes(x = date, y = abs(variance_diff))) + geom_line() + geom_point() + @@ -1364,7 +1364,7 @@ ggplot(training_data_monthly, aes(x = date, y = abs(variance_diff))) + ### Distribution of Sales and SSE -```{r} +```{r _distribution_sales_sse_chart} ggplot(training_data_monthly, aes(x = date)) + geom_bar(aes(y = percent_sales, fill = "Sales"), stat = "identity", position = "identity", alpha = 0.5 From 9652e6a06a479ba8bbb8865754b397a94cd7c521 Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Fri, 27 Dec 2024 13:29:28 -0600 Subject: [PATCH 15/23] Remove $ --- reports/performance/_model.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index 0f7efb62..7ef0b6e9 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1353,7 +1353,7 @@ ggplot(training_data_monthly, aes(x = date, y = abs(variance_diff))) + geom_smooth(se = FALSE) + labs( x = "Month", - y = "Difference in Variance ($)" + y = "Difference in Variance" ) + scale_y_continuous(labels = function(x) { scales::label_scientific()(x) %>% From 88201f33ad1fbbbb9530c6eec777dd42a5a779fe Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 27 Dec 2024 19:39:58 +0000 Subject: [PATCH 16/23] Remove absolute --- reports/performance/_model.qmd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index 7ef0b6e9..255418ae 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1344,13 +1344,13 @@ ggplot( ``` -### $ Difference abs(FMV - Sale Price) +### Variance Difference (Sale Price - FMV) ```{r _variance_diff_chart} -ggplot(training_data_monthly, aes(x = date, y = abs(variance_diff))) + +ggplot(training_data_monthly, aes(x = date, y = variance_sale - variance_fmv)) + geom_line() + geom_point() + - geom_smooth(se = FALSE) + + geom_smooth(method = "loess", se = FALSE) + labs( x = "Month", y = "Difference in Variance" From c5a62a44271d077fe82fa4d4ac719882e0718a63 Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Fri, 27 Dec 2024 18:25:28 -0600 Subject: [PATCH 17/23] Update reports/performance/_model.qmd Co-authored-by: Dan Snow <31494343+dfsnow@users.noreply.github.com> --- reports/performance/_model.qmd | 1 - 1 file changed, 1 deletion(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index 255418ae..536cfae5 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1265,7 +1265,6 @@ These charts look at the variance of the sale price and estimated FMV over time. ::: {.panel-tabset} - ```{r _organize_variance_data} training_data_monthly <- training_data_pred %>% filter(!ind_pin_is_multicard, !sv_is_outlier) %>% From 1850dc15a171e01734e5b76268ab0e39e9a94b59 Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Fri, 27 Dec 2024 18:25:38 -0600 Subject: [PATCH 18/23] Update reports/performance/_model.qmd Co-authored-by: Dan Snow <31494343+dfsnow@users.noreply.github.com> --- reports/performance/_model.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index 536cfae5..5eb4ed02 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1261,7 +1261,7 @@ model_big_misses_assessment %>% ## Variance Over Time -These charts look at the variance of the sale price and estimated FMV over time. Ideally, these should remain stable. One should be aware that while ratios may demonstrate consistent trends, because housing values tend to increase over time, the aggregate difference may increase. +These plot shows show trends in the variance of sale price and estimated FMV. Ideally, the model's estimates should have the same variance as the true values (sales) with respect to time. ::: {.panel-tabset} From 08469e3782d10674d766012d5b7f5a4aa9636473 Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Fri, 27 Dec 2024 18:25:46 -0600 Subject: [PATCH 19/23] Update reports/performance/_model.qmd Co-authored-by: Dan Snow <31494343+dfsnow@users.noreply.github.com> --- reports/performance/_model.qmd | 1 + 1 file changed, 1 insertion(+) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index 5eb4ed02..b909cc29 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1316,6 +1316,7 @@ ggplot(training_data_monthly, aes(x = date, y = variance_ratio)) + ``` ### Total FMV and Sale Price Variance + ```{r _overall_variance_chart} ggplot( training_data_monthly_long %>% filter(Metric %in% c("variance_sale", "variance_fmv")), From cbdfc599e7be8015779987e63d1dac736e224bd7 Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Fri, 27 Dec 2024 18:26:00 -0600 Subject: [PATCH 20/23] Update reports/performance/_model.qmd Co-authored-by: Dan Snow <31494343+dfsnow@users.noreply.github.com> --- reports/performance/_model.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index b909cc29..408dcac1 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1265,7 +1265,7 @@ These plot shows show trends in the variance of sale price and estimated FMV. Id ::: {.panel-tabset} -```{r _organize_variance_data} +```{r _model_organize_variance_data} training_data_monthly <- training_data_pred %>% filter(!ind_pin_is_multicard, !sv_is_outlier) %>% mutate( From 5d4b0a8c503bb32bf54f8491c8442cce21e46f86 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 2 Jan 2025 00:29:39 +0000 Subject: [PATCH 21/23] wrapup --- reports/performance/_model.qmd | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index 408dcac1..3219546d 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1296,7 +1296,10 @@ training_data_monthly <- training_data_pred %>% training_data_monthly_long <- training_data_monthly %>% pivot_longer( - cols = c(variance_sale, variance_fmv, percent_sales, percent_sse, variance_diff), + cols = c( + variance_sale, variance_fmv, percent_sales, + percent_sse, variance_diff + ), names_to = "Metric", values_to = "Value" ) @@ -1304,7 +1307,7 @@ training_data_monthly_long <- training_data_monthly %>% ### Variance Ratio (FMV / Sale Price) -```{r _variance_ratio_chart} +```{r _model_variance_ratio_chart} ggplot(training_data_monthly, aes(x = date, y = variance_ratio)) + geom_line() + geom_point() + @@ -1317,9 +1320,10 @@ ggplot(training_data_monthly, aes(x = date, y = variance_ratio)) + ### Total FMV and Sale Price Variance -```{r _overall_variance_chart} +```{r _model_overall_variance_chart} ggplot( - training_data_monthly_long %>% filter(Metric %in% c("variance_sale", "variance_fmv")), + training_data_monthly_long %>% filter(Metric %in% + c("variance_sale", "variance_fmv")), aes(x = date, y = Value, color = Metric) ) + geom_line() + @@ -1346,13 +1350,13 @@ ggplot( ### Variance Difference (Sale Price - FMV) -```{r _variance_diff_chart} +```{r _model_variance_diff_chart} ggplot(training_data_monthly, aes(x = date, y = variance_sale - variance_fmv)) + geom_line() + geom_point() + geom_smooth(method = "loess", se = FALSE) + labs( - x = "Month", + x = "Date", y = "Difference in Variance" ) + scale_y_continuous(labels = function(x) { @@ -1364,7 +1368,7 @@ ggplot(training_data_monthly, aes(x = date, y = variance_sale - variance_fmv)) + ### Distribution of Sales and SSE -```{r _distribution_sales_sse_chart} +```{r _model_distribution_sales_sse_chart} ggplot(training_data_monthly, aes(x = date)) + geom_bar(aes(y = percent_sales, fill = "Sales"), stat = "identity", position = "identity", alpha = 0.5 @@ -1372,8 +1376,11 @@ ggplot(training_data_monthly, aes(x = date)) + geom_bar(aes(y = percent_sse, fill = "Sum of Square Errors"), stat = "identity", position = "identity", alpha = 0.5 ) + + scale_fill_manual( + values = c("Sales" = "#00BFC4", "Sum of Square Errors" = "#F8766D") + ) + labs( - x = "Month", + x = "Date", y = "Normalized Scale", fill = "", ) + From eaac4e7a43b1dd1e798cc10d43edf066b83898ed Mon Sep 17 00:00:00 2001 From: Dan Snow Date: Thu, 2 Jan 2025 20:54:50 +0000 Subject: [PATCH 22/23] Revert run_id change --- reports/_setup.qmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reports/_setup.qmd b/reports/_setup.qmd index 71b43e1f..569c43b5 100644 --- a/reports/_setup.qmd +++ b/reports/_setup.qmd @@ -1,7 +1,7 @@ --- params: - run_id: "2024-12-19-wonderful-yuxin" - year: "2025" + run_id: "2024-03-17-stupefied-maya" + year: "2024" --- ```{r} From 796855da002acd5a3ec7ffd09337363ccca6697a Mon Sep 17 00:00:00 2001 From: Dan Snow Date: Thu, 2 Jan 2025 20:56:09 +0000 Subject: [PATCH 23/23] Remove extraneous space --- reports/performance/_model.qmd | 1 - 1 file changed, 1 deletion(-) diff --git a/reports/performance/_model.qmd b/reports/performance/_model.qmd index 3219546d..68f3f284 100644 --- a/reports/performance/_model.qmd +++ b/reports/performance/_model.qmd @@ -1347,7 +1347,6 @@ ggplot( theme_minimal() ``` - ### Variance Difference (Sale Price - FMV) ```{r _model_variance_diff_chart}