From 192e2a9fbc98e3d3d6aab0aa895b7766feddf0a1 Mon Sep 17 00:00:00 2001 From: ronnyhdez Date: Sun, 20 Aug 2023 13:19:25 -0600 Subject: [PATCH 01/11] Ref #75 include mae in lm results table --- chapter_2_lm.qmd | 42 ++++++++-------- scripts/lm_preparation.R | 106 +++++++++++++++++++++++---------------- 2 files changed, 85 insertions(+), 63 deletions(-) diff --git a/chapter_2_lm.qmd b/chapter_2_lm.qmd index d5d0a18..7a5ffc6 100644 --- a/chapter_2_lm.qmd +++ b/chapter_2_lm.qmd @@ -658,12 +658,12 @@ source("scripts/lm_preparation.R") # Monthly table vis_site_glance_montly %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% + select(site, index, r.squared, mae, rmse) %>% bind_rows(all_sites_glance_monthly, all_sites_all_vis_glance_monthly, all_vis_glance_monthly) %>% pivot_wider(names_from = index, - values_from = c(r.squared, adj.r.squared, rmse)) %>% + values_from = c(r.squared, mae, rmse)) %>% gt() %>% tab_spanner(label = md("NDVI"), columns = ends_with("NDVI")) %>% tab_spanner(label = "EVI", columns = ends_with("EVI")) %>% @@ -674,19 +674,19 @@ vis_site_glance_montly %>% .list = list( "site" = "Site", "r.squared_EVI" = "R2", - "adj.r.squared_EVI" = "adj R2", + "mae_EVI" = "MAE", "rmse_EVI" = "RMSE", "r.squared_NDVI" = "R2", - "adj.r.squared_NDVI" = "adj R2", + "mae_NDVI" = "MAE", "rmse_NDVI" = "RMSE", "r.squared_NIRv" = "R2", - "adj.r.squared_NIRv" = "adj R2", + "mae_NIRv" = "MAE", "rmse_NIRv" = "RMSE", "r.squared_CCI" = "R2", - "adj.r.squared_CCI" = "adj R2", + "mae_CCI" = "MAE", "rmse_CCI" = "RMSE", "r.squared_All" = "R2", - "adj.r.squared_All" = "adj R2", + "mae_All" = "MAE", "rmse_All" = "RMSE" ) ) %>% @@ -705,12 +705,12 @@ vis_site_glance_montly %>% # Weekly table vis_site_glance_weekly %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% + select(site, index, r.squared, mae, rmse) %>% bind_rows(all_sites_glance_weekly, all_sites_all_vis_glance_weekly, all_vis_glance_weekly) %>% pivot_wider(names_from = index, - values_from = c(r.squared, adj.r.squared, rmse)) %>% + values_from = c(r.squared, mae, rmse)) %>% gt() %>% tab_spanner(label = md("NDVI"), columns = ends_with("NDVI")) %>% tab_spanner(label = "EVI", columns = ends_with("EVI")) %>% @@ -721,19 +721,19 @@ vis_site_glance_weekly %>% .list = list( "site" = "Site", "r.squared_EVI" = "R2", - "adj.r.squared_EVI" = "adj R2", + "mae_EVI" = "MAE", "rmse_EVI" = "RMSE", "r.squared_NDVI" = "R2", - "adj.r.squared_NDVI" = "adj R2", + "mae_NDVI" = "MAE", "rmse_NDVI" = "RMSE", "r.squared_NIRv" = "R2", - "adj.r.squared_NIRv" = "adj R2", + "mae_NIRv" = "MAE", "rmse_NIRv" = "RMSE", "r.squared_CCI" = "R2", - "adj.r.squared_CCI" = "adj R2", + "mae_CCI" = "MAE", "rmse_CCI" = "RMSE", "r.squared_All" = "R2", - "adj.r.squared_All" = "adj R2", + "mae_All" = "MAE", "rmse_All" = "RMSE" ) ) %>% @@ -752,12 +752,12 @@ vis_site_glance_weekly %>% # Daily table vis_site_glance_daily %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% + select(site, index, r.squared, mae, rmse) %>% bind_rows(all_sites_glance_daily, all_sites_all_vis_glance_daily, all_vis_glance_daily) %>% pivot_wider(names_from = index, - values_from = c(r.squared, adj.r.squared, rmse)) %>% + values_from = c(r.squared, mae, rmse)) %>% gt() %>% tab_spanner(label = md("NDVI"), columns = ends_with("NDVI")) %>% tab_spanner(label = "EVI", columns = ends_with("EVI")) %>% @@ -768,19 +768,19 @@ vis_site_glance_daily %>% .list = list( "site" = "Site", "r.squared_EVI" = "R2", - "adj.r.squared_EVI" = "adj R2", + "mae_EVI" = "MAE", "rmse_EVI" = "RMSE", "r.squared_NDVI" = "R2", - "adj.r.squared_NDVI" = "adj R2", + "mae_NDVI" = "MAE", "rmse_NDVI" = "RMSE", "r.squared_NIRv" = "R2", - "adj.r.squared_NIRv" = "adj R2", + "mae_NIRv" = "MAE", "rmse_NIRv" = "RMSE", "r.squared_CCI" = "R2", - "adj.r.squared_CCI" = "adj R2", + "mae_CCI" = "MAE", "rmse_CCI" = "RMSE", "r.squared_All" = "R2", - "adj.r.squared_All" = "adj R2", + "mae_All" = "MAE", "rmse_All" = "RMSE" ) ) %>% diff --git a/scripts/lm_preparation.R b/scripts/lm_preparation.R index ebf0703..8c22e78 100644 --- a/scripts/lm_preparation.R +++ b/scripts/lm_preparation.R @@ -10,6 +10,7 @@ library(dplyr) library(broom) library(tidymodels) +library(purrr) source("scripts/models_data_preparation.R") @@ -50,12 +51,13 @@ all_fit <- all_sites_lm %>% mutate(site = "All") all_sites_glance_monthly <- all_sites_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(site = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% + select(site, index, r.squared, mae, rmse) %>% mutate(index = case_when( index == "evi_mean" ~ "EVI", index == "ndvi_mean" ~ "NDVI", @@ -81,7 +83,8 @@ evi_fit <- evi_lm %>% mutate(index = "EVI") evi_glance_monthly <- evi_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -103,7 +106,8 @@ ndvi_fit <- ndvi_lm %>% mutate(index = "NDVI") ndvi_glance_monthly <- ndvi_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -125,7 +129,8 @@ nirv_fit <- nirv_lm %>% mutate(index = "NIRv") nirv_glance_monthly <- nirv_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -147,16 +152,17 @@ cci_fit <- cci_lm %>% mutate(index = "CCI") cci_glance_monthly <- cci_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(index = "CCI") vis_site_glance_montly <- bind_rows(evi_glance_monthly, - ndvi_glance_monthly, - nirv_glance_monthly, - cci_glance_monthly) + ndvi_glance_monthly, + nirv_glance_monthly, + cci_glance_monthly) # # Linear model for all sites kNDVI @@ -202,12 +208,13 @@ all_fit <- all_vis_lm %>% mutate(site = "All") all_vis_glance_monthly <- all_vis_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(index = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% + select(site, index, r.squared, mae, rmse) %>% mutate(index = case_when( index == "evi_mean" ~ "EVI", index == "ndvi_mean" ~ "NDVI", @@ -224,16 +231,17 @@ all_sites_all_indices <- lm(gpp_dt_vut_ref ~ evi_mean + # summary(all_sites_all_indices) metrics <- augment(all_sites_all_indices) %>% - select(gpp_dt_vut_ref, .resid) %>% - mutate(rmse = (sqrt(mean((.resid)^2)))) + select(gpp_dt_vut_ref, .resid) rmse <- sqrt(mean((metrics$.resid)^2)) +mae <- mean(abs(metrics$.resid)) all_sites_all_vis_glance_monthly <- glance(all_sites_all_indices) %>% mutate(rmse = rmse, + mae = mae, site = "All", index = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) + select(site, index, r.squared, mae, rmse) # WEEKLY LMS ------------------------------------------------------------------ # Prepare data with all sites @@ -273,12 +281,13 @@ all_fit <- all_sites_lm %>% mutate(site = "All") all_sites_glance_weekly <- all_sites_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(site = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% + select(site, index, r.squared, mae, rmse) %>% mutate(index = case_when( index == "evi_mean" ~ "EVI", index == "ndvi_mean" ~ "NDVI", @@ -304,7 +313,8 @@ evi_fit <- evi_lm %>% mutate(index = "EVI") evi_glance_weekly <- evi_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -326,7 +336,8 @@ ndvi_fit <- ndvi_lm %>% mutate(index = "NDVI") ndvi_glance_weekly <- ndvi_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -348,7 +359,8 @@ nirv_fit <- nirv_lm %>% mutate(index = "NIRv") nirv_glance_weekly <- nirv_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -370,16 +382,17 @@ cci_fit <- cci_lm %>% mutate(index = "CCI") cci_glance_weekly <- cci_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(index = "CCI") vis_site_glance_weekly <- bind_rows(evi_glance_weekly, - ndvi_glance_weekly, - nirv_glance_weekly, - cci_glance_weekly) + ndvi_glance_weekly, + nirv_glance_weekly, + cci_glance_weekly) # Linear model for all VI's [C | All VIs] all_vis_lm <- ind_sites %>% @@ -401,12 +414,13 @@ all_fit <- all_vis_lm %>% mutate(site = "All") all_vis_glance_weekly <- all_vis_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(index = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% + select(site, index, r.squared, mae, rmse) %>% mutate(index = case_when( index == "evi_mean" ~ "EVI", index == "ndvi_mean" ~ "NDVI", @@ -424,16 +438,17 @@ all_sites_all_indices <- lm(gpp_dt_vut_ref ~ evi_mean + # summary(all_sites_all_indices) metrics <- augment(all_sites_all_indices) %>% - select(gpp_dt_vut_ref, .resid) %>% - mutate(rmse = (sqrt(mean((.resid)^2)))) + select(gpp_dt_vut_ref, .resid) rmse <- sqrt(mean((metrics$.resid)^2)) +mae <- mean(abs(metrics$.resid)) all_sites_all_vis_glance_weekly <- glance(all_sites_all_indices) %>% mutate(rmse = rmse, + mae = mae, site = "All", index = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) + select(site, index, r.squared, mae, rmse) # DAILY LMS ------------------------------------------------------------------ @@ -473,12 +488,13 @@ all_fit <- all_sites_lm %>% mutate(site = "All") all_sites_glance_daily <- all_sites_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(site = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% + select(site, index, r.squared, mae, rmse) %>% mutate(index = case_when( index == "evi_mean" ~ "EVI", index == "ndvi_mean" ~ "NDVI", @@ -504,7 +520,8 @@ evi_fit <- evi_lm %>% mutate(index = "EVI") evi_glance_daily <- evi_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -526,7 +543,8 @@ ndvi_fit <- ndvi_lm %>% mutate(index = "NDVI") ndvi_glance_daily <- ndvi_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -548,7 +566,8 @@ nirv_fit <- nirv_lm %>% mutate(index = "NIRv") nirv_glance_daily <- nirv_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied, -augmented) %>% arrange(desc(r.squared)) %>% @@ -570,16 +589,17 @@ cci_fit <- cci_lm %>% mutate(index = "CCI") cci_glance_daily <- cci_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(index = "CCI") vis_site_glance_daily <- bind_rows(evi_glance_daily, - ndvi_glance_daily, - nirv_glance_daily, - cci_glance_daily) + ndvi_glance_daily, + nirv_glance_daily, + cci_glance_daily) # Linear model for all VI's [C | All VIs] all_vis_lm <- ind_sites %>% @@ -601,12 +621,13 @@ all_fit <- all_vis_lm %>% mutate(site = "All") all_vis_glance_daily <- all_vis_lm %>% - mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2))), + mae = map_dbl(augmented, ~mean(abs(.x$.resid)))) %>% unnest(glanced) %>% select(-data, -fit, -tidied) %>% arrange(desc(r.squared)) %>% mutate(index = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) %>% + select(site, index, r.squared, mae, rmse) %>% mutate(index = case_when( index == "evi_mean" ~ "EVI", index == "ndvi_mean" ~ "NDVI", @@ -624,14 +645,15 @@ all_sites_all_indices <- lm(gpp_dt_vut_ref ~ evi_mean + # summary(all_sites_all_indices) metrics <- augment(all_sites_all_indices) %>% - select(gpp_dt_vut_ref, .resid) %>% - mutate(rmse = (sqrt(mean((.resid)^2)))) + select(gpp_dt_vut_ref, .resid) rmse <- sqrt(mean((metrics$.resid)^2)) +mae <- mean(abs(metrics$.resid)) all_sites_all_vis_glance_daily <- glance(all_sites_all_indices) %>% mutate(rmse = rmse, + mae = mae, site = "All", index = "All") %>% - select(site, index, r.squared, adj.r.squared, rmse) + select(site, index, r.squared, mae, rmse) From b0b8fd16b0684b5a205efe143802f6b7fd4638dd Mon Sep 17 00:00:00 2001 From: ronnyhdez Date: Mon, 21 Aug 2023 12:45:17 -0600 Subject: [PATCH 02/11] Ref #60 map for the bar plot monthly --- test_plot_residuals.R | 112 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 test_plot_residuals.R diff --git a/test_plot_residuals.R b/test_plot_residuals.R new file mode 100644 index 0000000..f7685a8 --- /dev/null +++ b/test_plot_residuals.R @@ -0,0 +1,112 @@ +# File to check if we can plot the residuals from each of the models + +# Object for residuals distribution: +all_sites_lm + +library(cowplot) +library(ggridges) + +all_sites_lm %>% + # mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + select(index, augmented) %>% + unnest(cols = c(augmented)) %>% + select(index, gpp_dt_vut_ref, .fitted, .resid) %>% + ggplot(aes(x = .resid, fill = index)) + + geom_density(alpha = .7) + + scale_fill_viridis_d() + + scale_y_continuous(expand = c(0, 0)) + + theme_half_open(12) + + +all_sites_lm %>% + # mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + select(index, augmented) %>% + unnest(cols = c(augmented)) %>% + select(index, gpp_dt_vut_ref, .fitted, .resid) %>% + rename("residuals" = ".resid") %>% + ggplot(aes(x = residuals, y = index, fill = index)) + + geom_density_ridges() + + geom_vline(xintercept = 0, colour = "#FF5500", + linewidth = 0.7, linetype = "dashed") + + scale_fill_viridis_d() + + theme_ridges() + + theme(legend.position = "none") + +# all_sites_lm %>% +# unnest(augmented) %>% glimpse() + +all_sites_lm %>% + # mutate(rmse = map_dbl(augmented, ~sqrt(mean((.x$.resid)^2)))) %>% + select(index, augmented) %>% + unnest(cols = c(augmented)) %>% + ggplot(aes(x = .fitted, y = gpp_dt_vut_ref, colour = index)) + + geom_point(size = 4, alpha = .8) + + geom_abline(lty = 1, color = "#E20D6A", linewidth = 2) + + scale_color_viridis_d() + + theme_ridges() + +# Barplot monthly +response_vars <- c("r.squared", "mae", "rmse") + +# Create a function to generate the plot +create_plot <- function(y_var, ylim_range) { + vis_site_glance_montly %>% + select(site, index, {{ y_var }}) %>% + bind_rows(all_sites_glance_monthly, + all_sites_all_vis_glance_monthly, + all_vis_glance_monthly) %>% + ggplot(aes(x = site, y = .data[[y_var]], fill = index)) + + geom_col(position = "dodge") + + coord_cartesian(ylim = ylim_range) + + scale_fill_viridis_d() + + theme_minimal_hgrid() +} + +# Map over the response variables and create the plots +plots <- map2(response_vars, list(c(0.5, 1), c(0.5, 2.5), c(0.8, 3)), create_plot) + + +# Grid the plots as should go in the chapter +plot_grid(plots[[1]], plots[[2]], plots[[3]], + nrow = 3) + + + + + + + +# Si tengo tiempo, revisar este proceso +# library(tidymodels) +# tidymodels_prefer() +# +# +# all_sites <- +# +# +# +# basic_rec <- +# recipe(Sale_Price ~ Neighborhood + Gr_Liv_Area + Year_Built + Bldg_Type + +# Latitude + Longitude, data = ames_train) %>% +# step_log(Gr_Liv_Area, base = 10) %>% +# step_other(Neighborhood, threshold = 0.01) %>% +# step_dummy(all_nominal_predictors()) +# +# interaction_rec <- +# basic_rec %>% +# step_interact( ~ Gr_Liv_Area:starts_with("Bldg_Type_") ) +# +# spline_rec <- +# interaction_rec %>% +# step_ns(Latitude, Longitude, deg_free = 50) +# +# preproc <- +# list(basic = basic_rec, +# interact = interaction_rec, +# splines = spline_rec +# ) +# +# lm_models <- workflow_set(preproc, list(lm = linear_reg()), cross = FALSE) +# lm_models +# + From b30f0a9fe78c8904846c1773af6b6866023366ce Mon Sep 17 00:00:00 2001 From: ronnyhdez Date: Mon, 21 Aug 2023 13:05:19 -0600 Subject: [PATCH 03/11] Ref #69 map over monthly weekly and daily metrics --- test_plot_residuals.R | 52 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/test_plot_residuals.R b/test_plot_residuals.R index f7685a8..31ddc81 100644 --- a/test_plot_residuals.R +++ b/test_plot_residuals.R @@ -1,6 +1,6 @@ # File to check if we can plot the residuals from each of the models -# Object for residuals distribution: +# Object for residuals distribution: ------------------------------------------- all_sites_lm library(cowplot) @@ -45,7 +45,7 @@ all_sites_lm %>% scale_color_viridis_d() + theme_ridges() -# Barplot monthly +# Barplot monthly -------------------------------------------------------------- response_vars <- c("r.squared", "mae", "rmse") # Create a function to generate the plot @@ -63,17 +63,59 @@ create_plot <- function(y_var, ylim_range) { } # Map over the response variables and create the plots -plots <- map2(response_vars, list(c(0.5, 1), c(0.5, 2.5), c(0.8, 3)), create_plot) - +plots <- map2(response_vars, + list(c(0.5, 1), c(0.5, 2.5), c(0.8, 3)), + create_plot) # Grid the plots as should go in the chapter -plot_grid(plots[[1]], plots[[2]], plots[[3]], +monthly_metrics_plot <- plot_grid(plots[[1]], plots[[2]], plots[[3]], nrow = 3) +# Weekly +create_plot <- function(y_var, ylim_range) { + vis_site_glance_weekly %>% + select(site, index, {{ y_var }}) %>% + bind_rows(all_sites_glance_weekly, + all_sites_all_vis_glance_weekly, + all_vis_glance_weekly) %>% + ggplot(aes(x = site, y = .data[[y_var]], fill = index)) + + geom_col(position = "dodge") + + coord_cartesian(ylim = ylim_range) + + scale_fill_viridis_d() + + theme_minimal_hgrid() +} + +# Map over the response variables and create the plots +plots <- map2(response_vars, + list(c(0.3, 0.9), c(0.8, 3.2), c(1.4, 3.6)), + create_plot) +# Grid the plots as should go in the chapter +weekly_metrics_plot <- plot_grid(plots[[1]], plots[[2]], plots[[3]], + nrow = 3) +# Weekly +create_plot <- function(y_var, ylim_range) { + vis_site_glance_weekly %>% + select(site, index, {{ y_var }}) %>% + bind_rows(all_sites_glance_daily, + all_sites_all_vis_glance_daily, + all_vis_glance_daily) %>% + ggplot(aes(x = site, y = .data[[y_var]], fill = index)) + + geom_col(position = "dodge") + + coord_cartesian(ylim = ylim_range) + + scale_fill_viridis_d() + + theme_minimal_hgrid() +} +# Map over the response variables and create the plots +plots <- map2(response_vars, + list(c(0.3, 0.9), c(0.8, 3.2), c(1.4, 3.6)), + create_plot) +# Grid the plots as should go in the chapter +daily_metrics_plot <- plot_grid(plots[[1]], plots[[2]], plots[[3]], + nrow = 3) # Si tengo tiempo, revisar este proceso From fa50b470d36f3e286321c5e9cfb02df568d9efde Mon Sep 17 00:00:00 2001 From: ronnyhdez Date: Tue, 22 Aug 2023 11:26:37 -0600 Subject: [PATCH 04/11] Ref #69 function to map over the metrics plots --- chapter_2_lm.qmd | 210 ++++++++++++++++++++++------------------------- 1 file changed, 96 insertions(+), 114 deletions(-) diff --git a/chapter_2_lm.qmd b/chapter_2_lm.qmd index 7a5ffc6..c9fccd1 100644 --- a/chapter_2_lm.qmd +++ b/chapter_2_lm.qmd @@ -656,6 +656,48 @@ The complete metrics results for each of the linear models are in #| warning: false source("scripts/lm_preparation.R") +create_metrics_table <- function(data) { + data %>% + gt() %>% + tab_spanner(label = md("NDVI"), columns = ends_with("NDVI")) %>% + tab_spanner(label = "EVI", columns = ends_with("EVI")) %>% + tab_spanner(label = "NIRv", columns = ends_with("NIRv")) %>% + tab_spanner(label = "CCI", columns = ends_with("CCI")) %>% + tab_spanner(label = "All", columns = ends_with("All")) %>% + cols_label( + .list = list( + "site" = "Site", + "r.squared_EVI" = "R2", + "mae_EVI" = "MAE", + "rmse_EVI" = "RMSE", + "r.squared_NDVI" = "R2", + "mae_NDVI" = "MAE", + "rmse_NDVI" = "RMSE", + "r.squared_NIRv" = "R2", + "mae_NIRv" = "MAE", + "rmse_NIRv" = "RMSE", + "r.squared_CCI" = "R2", + "mae_CCI" = "MAE", + "rmse_CCI" = "RMSE", + "r.squared_All" = "R2", + "mae_All" = "MAE", + "rmse_All" = "RMSE" + ) + ) %>% + cols_align( + align = "center", + columns = 2:16 + ) %>% + fmt_number( + columns = 2:16, + decimals = 2) %>% + tab_options( + row_group.background.color = "#E9E0E1", + row_group.font.weight = "bold" + ) %>% + cols_width(everything() ~ px(50)) +} + # Monthly table vis_site_glance_montly %>% select(site, index, r.squared, mae, rmse) %>% @@ -664,44 +706,8 @@ vis_site_glance_montly %>% all_vis_glance_monthly) %>% pivot_wider(names_from = index, values_from = c(r.squared, mae, rmse)) %>% - gt() %>% - tab_spanner(label = md("NDVI"), columns = ends_with("NDVI")) %>% - tab_spanner(label = "EVI", columns = ends_with("EVI")) %>% - tab_spanner(label = "NIRv", columns = ends_with("NIRv")) %>% - tab_spanner(label = "CCI", columns = ends_with("CCI")) %>% - tab_spanner(label = "All", columns = ends_with("All")) %>% - cols_label( - .list = list( - "site" = "Site", - "r.squared_EVI" = "R2", - "mae_EVI" = "MAE", - "rmse_EVI" = "RMSE", - "r.squared_NDVI" = "R2", - "mae_NDVI" = "MAE", - "rmse_NDVI" = "RMSE", - "r.squared_NIRv" = "R2", - "mae_NIRv" = "MAE", - "rmse_NIRv" = "RMSE", - "r.squared_CCI" = "R2", - "mae_CCI" = "MAE", - "rmse_CCI" = "RMSE", - "r.squared_All" = "R2", - "mae_All" = "MAE", - "rmse_All" = "RMSE" - ) - ) %>% - cols_align( - align = "center", - columns = 2:16 - ) %>% - fmt_number( - columns = 2:16, - decimals = 2) %>% - tab_options( - row_group.background.color = "#E9E0E1", - row_group.font.weight = "bold" - ) %>% - cols_width(everything() ~ px(50)) + create_metrics_table() + # Weekly table vis_site_glance_weekly %>% @@ -711,44 +717,7 @@ vis_site_glance_weekly %>% all_vis_glance_weekly) %>% pivot_wider(names_from = index, values_from = c(r.squared, mae, rmse)) %>% - gt() %>% - tab_spanner(label = md("NDVI"), columns = ends_with("NDVI")) %>% - tab_spanner(label = "EVI", columns = ends_with("EVI")) %>% - tab_spanner(label = "NIRv", columns = ends_with("NIRv")) %>% - tab_spanner(label = "CCI", columns = ends_with("CCI")) %>% - tab_spanner(label = "All", columns = ends_with("All")) %>% - cols_label( - .list = list( - "site" = "Site", - "r.squared_EVI" = "R2", - "mae_EVI" = "MAE", - "rmse_EVI" = "RMSE", - "r.squared_NDVI" = "R2", - "mae_NDVI" = "MAE", - "rmse_NDVI" = "RMSE", - "r.squared_NIRv" = "R2", - "mae_NIRv" = "MAE", - "rmse_NIRv" = "RMSE", - "r.squared_CCI" = "R2", - "mae_CCI" = "MAE", - "rmse_CCI" = "RMSE", - "r.squared_All" = "R2", - "mae_All" = "MAE", - "rmse_All" = "RMSE" - ) - ) %>% - cols_align( - align = "center", - columns = 2:16 - ) %>% - fmt_number( - columns = 2:16, - decimals = 2) %>% - tab_options( - row_group.background.color = "#E9E0E1", - row_group.font.weight = "bold" - ) %>% - cols_width(everything() ~ px(50)) + create_metrics_table() # Daily table vis_site_glance_daily %>% @@ -758,48 +727,61 @@ vis_site_glance_daily %>% all_vis_glance_daily) %>% pivot_wider(names_from = index, values_from = c(r.squared, mae, rmse)) %>% - gt() %>% - tab_spanner(label = md("NDVI"), columns = ends_with("NDVI")) %>% - tab_spanner(label = "EVI", columns = ends_with("EVI")) %>% - tab_spanner(label = "NIRv", columns = ends_with("NIRv")) %>% - tab_spanner(label = "CCI", columns = ends_with("CCI")) %>% - tab_spanner(label = "All", columns = ends_with("All")) %>% - cols_label( - .list = list( - "site" = "Site", - "r.squared_EVI" = "R2", - "mae_EVI" = "MAE", - "rmse_EVI" = "RMSE", - "r.squared_NDVI" = "R2", - "mae_NDVI" = "MAE", - "rmse_NDVI" = "RMSE", - "r.squared_NIRv" = "R2", - "mae_NIRv" = "MAE", - "rmse_NIRv" = "RMSE", - "r.squared_CCI" = "R2", - "mae_CCI" = "MAE", - "rmse_CCI" = "RMSE", - "r.squared_All" = "R2", - "mae_All" = "MAE", - "rmse_All" = "RMSE" - ) - ) %>% - cols_align( - align = "center", - columns = 2:16 - ) %>% - fmt_number( - columns = 2:16, - decimals = 2) %>% - tab_options( - row_group.background.color = "#E9E0E1", - row_group.font.weight = "bold" - ) %>% - cols_width(everything() ~ px(50)) + create_metrics_table() ``` \endgroup + +```{r lm_barplot_metrics} +#| label: fig-lm_metrics +#| fig-cap: "Summary of Linear models for GPP estimation using the vegetation indices on a monthly (a), weekly (b), and daily (c) basis." +#| echo: false +#| message: false +#| warning: false +library(cowplot) + +# Create a function to generate the plot +create_metrics_plot <- function(timescale, y_var, ylim_range) { + get(paste0("vis_site_glance_", timescale)) %>% + select(site, index, {{ y_var }}) %>% + bind_rows(get(paste0("all_sites_glance_", timescale)), + get(paste0("all_sites_all_vis_glance_", timescale)), + get(paste0("all_vis_glance_", timescale))) %>% + ggplot(aes(x = site, y = .data[[y_var]], fill = index)) + + geom_col(position = "dodge") + + coord_cartesian(ylim = ylim_range) + + scale_fill_viridis_d() + + theme_minimal_hgrid() +} + +# Response variables are going to be the same: +response_vars <- c("r.squared", "mae", "rmse") + +# Monthly plots +monthly_metrics_plots <- map2(response_vars, + list(c(0.5, 1), c(0.5, 2.5), c(0.8, 3)), + ~ create_metrics_plot("monthly", .x, .y)) + +# Weekly plots +weekly_metrics_plots <- map2(response_vars, + list(c(0.3, 0.9), c(0.8, 3.2), c(1.4, 3.6)), + ~ create_metrics_plot("weekly", .x, .y)) + +# Daily plots +daily_metrics_plots <- map2(response_vars, + list(c(0.3, 0.9), c(0.8, 3.2), c(1.4, 3.6)), + ~ create_metrics_plot("daily", .x, .y)) + +# Grid the plots as should go in the chapter +plot_grid(monthly_metrics_plots[[1]], weekly_metrics_plots[[1]], daily_metrics_plots[[1]], + monthly_metrics_plots[[2]], weekly_metrics_plots[[2]], daily_metrics_plots[[2]], + monthly_metrics_plots[[3]], weekly_metrics_plots[[3]], daily_metrics_plots[[3]], + nrow = 3, + ncol = 3) +``` + + ### Analysis of GPP-Vegetation Index Relationships Using GAM Models This variance implied that a linear From 150cfbde1554ab2fc0f8801459cd88248fb69ebc Mon Sep 17 00:00:00 2001 From: ronnyhdez Date: Tue, 22 Aug 2023 11:36:12 -0600 Subject: [PATCH 05/11] Ref #69 include plot in the chapter 2 --- chapter_2_lm.qmd | 25 ++++++++++++++++++++----- scripts/lm_preparation.R | 2 +- test_plot_residuals.R | 11 ++++++++--- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/chapter_2_lm.qmd b/chapter_2_lm.qmd index c9fccd1..3d66692 100644 --- a/chapter_2_lm.qmd +++ b/chapter_2_lm.qmd @@ -699,7 +699,7 @@ create_metrics_table <- function(data) { } # Monthly table -vis_site_glance_montly %>% +vis_site_glance_monthly %>% select(site, index, r.squared, mae, rmse) %>% bind_rows(all_sites_glance_monthly, all_sites_all_vis_glance_monthly, @@ -774,11 +774,26 @@ daily_metrics_plots <- map2(response_vars, ~ create_metrics_plot("daily", .x, .y)) # Grid the plots as should go in the chapter -plot_grid(monthly_metrics_plots[[1]], weekly_metrics_plots[[1]], daily_metrics_plots[[1]], - monthly_metrics_plots[[2]], weekly_metrics_plots[[2]], daily_metrics_plots[[2]], - monthly_metrics_plots[[3]], weekly_metrics_plots[[3]], daily_metrics_plots[[3]], +lm_metrics_plots <- plot_grid(monthly_metrics_plots[[1]] + theme(legend.position = "none"), + weekly_metrics_plots[[1]] + theme(legend.position = "none"), + daily_metrics_plots[[1]] + theme(legend.position = "none"), + monthly_metrics_plots[[2]] + theme(legend.position = "none"), + weekly_metrics_plots[[2]] + theme(legend.position = "none"), + daily_metrics_plots[[2]] + theme(legend.position = "none"), + monthly_metrics_plots[[3]] + theme(legend.position = "none"), + weekly_metrics_plots[[3]] + theme(legend.position = "none"), + daily_metrics_plots[[3]] + theme(legend.position = "none"), nrow = 3, - ncol = 3) + ncol = 3, + labels = c("A", "B", "C")) + +plot_legend <- get_legend( + monthly_metrics_plots[[1]] + + guides(color = guide_legend(nrow = 3)) +) + +gpp_trends <- plot_grid(lm_metrics_plots, plot_legend, ncol = 2, rel_widths = c(1, .1)) +rm(plot_legend) ``` diff --git a/scripts/lm_preparation.R b/scripts/lm_preparation.R index 8c22e78..6afd7fa 100644 --- a/scripts/lm_preparation.R +++ b/scripts/lm_preparation.R @@ -159,7 +159,7 @@ cci_glance_monthly <- cci_lm %>% arrange(desc(r.squared)) %>% mutate(index = "CCI") -vis_site_glance_montly <- bind_rows(evi_glance_monthly, +vis_site_glance_monthly <- bind_rows(evi_glance_monthly, ndvi_glance_monthly, nirv_glance_monthly, cci_glance_monthly) diff --git a/test_plot_residuals.R b/test_plot_residuals.R index 31ddc81..2eb16ed 100644 --- a/test_plot_residuals.R +++ b/test_plot_residuals.R @@ -63,13 +63,13 @@ create_plot <- function(y_var, ylim_range) { } # Map over the response variables and create the plots -plots <- map2(response_vars, +monthly_metrics_plots <- map2(response_vars, list(c(0.5, 1), c(0.5, 2.5), c(0.8, 3)), create_plot) # Grid the plots as should go in the chapter -monthly_metrics_plot <- plot_grid(plots[[1]], plots[[2]], plots[[3]], - nrow = 3) +# monthly_metrics_plot <- plot_grid(plots[[1]], plots[[2]], plots[[3]], +# nrow = 3) # Weekly create_plot <- function(y_var, ylim_range) { @@ -117,6 +117,11 @@ plots <- map2(response_vars, daily_metrics_plot <- plot_grid(plots[[1]], plots[[2]], plots[[3]], nrow = 3) +plot_grid(monthly_metrics_plot, + weekly_metrics_plot, + daily_metrics_plot, + nrow = 3, + ncol = 3) # Si tengo tiempo, revisar este proceso # library(tidymodels) From f20bebaa51d870efef6ebaaaa9bd8aba18aa6a87 Mon Sep 17 00:00:00 2001 From: ronnyhdez Date: Tue, 22 Aug 2023 11:50:29 -0600 Subject: [PATCH 06/11] Ref #69 plot not as an object --- chapter_2_lm.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapter_2_lm.qmd b/chapter_2_lm.qmd index 3d66692..928df82 100644 --- a/chapter_2_lm.qmd +++ b/chapter_2_lm.qmd @@ -792,7 +792,7 @@ plot_legend <- get_legend( guides(color = guide_legend(nrow = 3)) ) -gpp_trends <- plot_grid(lm_metrics_plots, plot_legend, ncol = 2, rel_widths = c(1, .1)) +plot_grid(lm_metrics_plots, plot_legend, ncol = 2, rel_widths = c(1, .1)) rm(plot_legend) ``` From b8647260cae257fc8285418576423cd213e92592 Mon Sep 17 00:00:00 2001 From: ronnyhdez Date: Tue, 22 Aug 2023 13:01:57 -0600 Subject: [PATCH 07/11] Ref #69 include metrics plot for GAM --- chapter_2_lm.qmd | 93 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 81 insertions(+), 12 deletions(-) diff --git a/chapter_2_lm.qmd b/chapter_2_lm.qmd index 928df82..3f53d7a 100644 --- a/chapter_2_lm.qmd +++ b/chapter_2_lm.qmd @@ -774,18 +774,19 @@ daily_metrics_plots <- map2(response_vars, ~ create_metrics_plot("daily", .x, .y)) # Grid the plots as should go in the chapter -lm_metrics_plots <- plot_grid(monthly_metrics_plots[[1]] + theme(legend.position = "none"), - weekly_metrics_plots[[1]] + theme(legend.position = "none"), - daily_metrics_plots[[1]] + theme(legend.position = "none"), - monthly_metrics_plots[[2]] + theme(legend.position = "none"), - weekly_metrics_plots[[2]] + theme(legend.position = "none"), - daily_metrics_plots[[2]] + theme(legend.position = "none"), - monthly_metrics_plots[[3]] + theme(legend.position = "none"), - weekly_metrics_plots[[3]] + theme(legend.position = "none"), - daily_metrics_plots[[3]] + theme(legend.position = "none"), - nrow = 3, - ncol = 3, - labels = c("A", "B", "C")) +lm_metrics_plots <- plot_grid( + monthly_metrics_plots[[1]] + theme(legend.position = "none"), + weekly_metrics_plots[[1]] + theme(legend.position = "none"), + daily_metrics_plots[[1]] + theme(legend.position = "none"), + monthly_metrics_plots[[2]] + theme(legend.position = "none"), + weekly_metrics_plots[[2]] + theme(legend.position = "none"), + daily_metrics_plots[[2]] + theme(legend.position = "none"), + monthly_metrics_plots[[3]] + theme(legend.position = "none"), + weekly_metrics_plots[[3]] + theme(legend.position = "none"), + daily_metrics_plots[[3]] + theme(legend.position = "none"), + nrow = 3, + ncol = 3, + labels = c("A", "B", "C")) plot_legend <- get_legend( monthly_metrics_plots[[1]] + @@ -988,6 +989,74 @@ all_sites_gam_monthly %>% ``` \endgroup +```{r lm_barplot_metrics} +#| label: fig-gam_metrics +#| fig-cap: "Summary of GAM models for GPP estimation using the vegetation indices on a monthly (a), weekly (b), and daily (c) basis." +#| echo: false +#| message: false +#| warning: false +library(cowplot) + +# Create a function to generate the plot +create_metrics_plot <- function(timescale, y_var, ylim_range) { + get(paste0("all_sites_gam_", timescale)) %>% + # select(site, index, {{ y_var }}) %>% + bind_rows(get(paste0("vis_sites_gam_", timescale)), + get(paste0("all_sites_all_vis_gam_", timescale)), + get(paste0("all_vis_gam_", timescale))) %>% + ggplot(aes(x = site, y = .data[[y_var]], fill = index)) + + geom_col(position = "dodge") + + coord_cartesian(ylim = ylim_range) + + scale_fill_viridis_d() + + theme_minimal_hgrid() +} + +# Response variables are going to be the same: +response_vars <- c("rsq", "mae", "rmse") + +# Monthly plots +monthly_metrics_plots <- map2(response_vars, + list(c(0.5, 1), c(0.5, 2.5), c(0.8, 3)), + ~ create_metrics_plot("monthly", .x, .y)) + +# Weekly plots +weekly_metrics_plots <- map2(response_vars, + list(c(0.3, 0.9), c(0.8, 3.2), c(1.4, 3.6)), + ~ create_metrics_plot("weekly", .x, .y)) + +# Daily plots +daily_metrics_plots <- map2(response_vars, + list(c(0.3, 0.9), c(0.8, 3.2), c(1.4, 3.6)), + ~ create_metrics_plot("daily", .x, .y)) + +# Grid the plots as should go in the chapter +gam_metrics_plots <- plot_grid( + monthly_metrics_plots[[1]] + theme(legend.position = "none"), + weekly_metrics_plots[[1]] + theme(legend.position = "none"), + daily_metrics_plots[[1]] + theme(legend.position = "none"), + monthly_metrics_plots[[2]] + theme(legend.position = "none"), + weekly_metrics_plots[[2]] + theme(legend.position = "none"), + daily_metrics_plots[[2]] + theme(legend.position = "none"), + monthly_metrics_plots[[3]] + theme(legend.position = "none"), + weekly_metrics_plots[[3]] + theme(legend.position = "none"), + daily_metrics_plots[[3]] + theme(legend.position = "none"), + nrow = 3, + ncol = 3, + labels = c("A", "B", "C")) + +plot_legend <- get_legend( + monthly_metrics_plots[[1]] + + guides(color = guide_legend(nrow = 3)) +) + +plot_grid(gam_metrics_plots, plot_legend, ncol = 2, rel_widths = c(1, .1)) +rm(plot_legend) +``` + + + + + \newpage ## Discussion From 0b8b7892c8933649f94d8c60f9ac091cd7c6d048 Mon Sep 17 00:00:00 2001 From: ronnyhdez Date: Tue, 22 Aug 2023 15:51:51 -0600 Subject: [PATCH 08/11] Ref #79 function for gam metrics table --- chapter_2_lm.qmd | 192 +++++++++++++++-------------------------------- 1 file changed, 59 insertions(+), 133 deletions(-) diff --git a/chapter_2_lm.qmd b/chapter_2_lm.qmd index 3f53d7a..b487a07 100644 --- a/chapter_2_lm.qmd +++ b/chapter_2_lm.qmd @@ -846,52 +846,57 @@ provide a more comprehensive explanation for the relationship. This adaptabilit #| warning: false source("scripts/gam_preparation.R") -# Daily table -all_sites_gam_daily %>% - bind_rows(vis_sites_gam_daily, - all_sites_all_vis_gam_daily, - all_vis_gam_daily, - ) %>% - pivot_wider(names_from = index, - values_from = c(rsq, mae, rmse)) %>% - gt() %>% - tab_spanner(label = md("NDVI"), columns = contains("NDVI")) %>% - tab_spanner(label = "EVI", columns = contains("EVI")) %>% - tab_spanner(label = "NIRv", columns = contains("NIRv")) %>% - tab_spanner(label = "CCI", columns = contains("CCI")) %>% - tab_spanner(label = "All", columns = contains("All")) %>% - cols_label( - .list = list( - "site" = "Site", - "rsq_evi_mean" = "R2", - "mae_evi_mean" = "MAE", - "rmse_evi_mean" = "RMSE", - "rsq_ndvi_mean" = "R2", - "mae_ndvi_mean" = "MAE", - "rmse_ndvi_mean" = "RMSE", - "rsq_nirv_mean" = "R2", - "mae_nirv_mean" = "MAE", - "rmse_nirv_mean" = "RMSE", - "rsq_cci_mean" = "R2", - "mae_cci_mean" = "MAE", - "rmse_cci_mean" = "RMSE", - "rsq_All" = "R2", - "mae_All" = "MAE", - "rmse_All" = "RMSE" - ) - ) %>% - cols_align( - align = "center", - columns = 2:16 - ) %>% - fmt_number( - columns = 2:16, - decimals = 2) %>% - tab_options( - row_group.background.color = "#E9E0E1", - row_group.font.weight = "bold" +create_metrics_table <- function(data) { + data %>% + pivot_wider(names_from = index, + values_from = c(rsq, mae, rmse)) %>% + gt() %>% + tab_spanner(label = md("NDVI"), columns = contains("NDVI")) %>% + tab_spanner(label = "EVI", columns = contains("EVI")) %>% + tab_spanner(label = "NIRv", columns = contains("NIRv")) %>% + tab_spanner(label = "CCI", columns = contains("CCI")) %>% + tab_spanner(label = "All", columns = contains("All")) %>% + cols_label( + .list = list( + "site" = "Site", + "rsq_evi_mean" = "R2", + "mae_evi_mean" = "MAE", + "rmse_evi_mean" = "RMSE", + "rsq_ndvi_mean" = "R2", + "mae_ndvi_mean" = "MAE", + "rmse_ndvi_mean" = "RMSE", + "rsq_nirv_mean" = "R2", + "mae_nirv_mean" = "MAE", + "rmse_nirv_mean" = "RMSE", + "rsq_cci_mean" = "R2", + "mae_cci_mean" = "MAE", + "rmse_cci_mean" = "RMSE", + "rsq_All" = "R2", + "mae_All" = "MAE", + "rmse_All" = "RMSE" + ) + ) %>% + cols_align( + align = "center", + columns = 2:16 + ) %>% + fmt_number( + columns = 2:16, + decimals = 2) %>% + tab_options( + row_group.background.color = "#E9E0E1", + row_group.font.weight = "bold" + ) %>% + cols_width(everything() ~ px(50)) +} + +# Monthly table +all_sites_gam_monthly %>% + bind_rows(vis_sites_gam_monthly, + all_sites_all_vis_gam_monthly, + all_vis_gam_monthly, ) %>% - cols_width(everything() ~ px(50)) + create_metrics_table() # Weekly table all_sites_gam_weekly %>% @@ -899,103 +904,24 @@ all_sites_gam_weekly %>% all_sites_all_vis_gam_weekly, all_vis_gam_weekly, ) %>% - pivot_wider(names_from = index, - values_from = c(rsq, mae, rmse)) %>% - gt() %>% - tab_spanner(label = md("NDVI"), columns = contains("NDVI")) %>% - tab_spanner(label = "EVI", columns = contains("EVI")) %>% - tab_spanner(label = "NIRv", columns = contains("NIRv")) %>% - tab_spanner(label = "CCI", columns = contains("CCI")) %>% - tab_spanner(label = "All", columns = contains("All")) %>% - cols_label( - .list = list( - "site" = "Site", - "rsq_evi_mean" = "R2", - "mae_evi_mean" = "MAE", - "rmse_evi_mean" = "RMSE", - "rsq_ndvi_mean" = "R2", - "mae_ndvi_mean" = "MAE", - "rmse_ndvi_mean" = "RMSE", - "rsq_nirv_mean" = "R2", - "mae_nirv_mean" = "MAE", - "rmse_nirv_mean" = "RMSE", - "rsq_cci_mean" = "R2", - "mae_cci_mean" = "MAE", - "rmse_cci_mean" = "RMSE", - "rsq_All" = "R2", - "mae_All" = "MAE", - "rmse_All" = "RMSE" - ) - ) %>% - cols_align( - align = "center", - columns = 2:16 - ) %>% - fmt_number( - columns = 2:16, - decimals = 2) %>% - tab_options( - row_group.background.color = "#E9E0E1", - row_group.font.weight = "bold" - ) %>% - cols_width(everything() ~ px(50)) + create_metrics_table() -# Monthly table -all_sites_gam_monthly %>% - bind_rows(vis_sites_gam_monthly, - all_sites_all_vis_gam_monthly, - all_vis_gam_monthly, - ) %>% - pivot_wider(names_from = index, - values_from = c(rsq, mae, rmse)) %>% - gt() %>% - tab_spanner(label = md("NDVI"), columns = contains("NDVI")) %>% - tab_spanner(label = "EVI", columns = contains("EVI")) %>% - tab_spanner(label = "NIRv", columns = contains("NIRv")) %>% - tab_spanner(label = "CCI", columns = contains("CCI")) %>% - tab_spanner(label = "All", columns = contains("All")) %>% - cols_label( - .list = list( - "site" = "Site", - "rsq_evi_mean" = "R2", - "mae_evi_mean" = "MAE", - "rmse_evi_mean" = "RMSE", - "rsq_ndvi_mean" = "R2", - "mae_ndvi_mean" = "MAE", - "rmse_ndvi_mean" = "RMSE", - "rsq_nirv_mean" = "R2", - "mae_nirv_mean" = "MAE", - "rmse_nirv_mean" = "RMSE", - "rsq_cci_mean" = "R2", - "mae_cci_mean" = "MAE", - "rmse_cci_mean" = "RMSE", - "rsq_All" = "R2", - "mae_All" = "MAE", - "rmse_All" = "RMSE" - ) - ) %>% - cols_align( - align = "center", - columns = 2:16 - ) %>% - fmt_number( - columns = 2:16, - decimals = 2) %>% - tab_options( - row_group.background.color = "#E9E0E1", - row_group.font.weight = "bold" +# Daily table +all_sites_gam_daily %>% + bind_rows(vis_sites_gam_daily, + all_sites_all_vis_gam_daily, + all_vis_gam_daily, ) %>% - cols_width(everything() ~ px(50)) + create_metrics_table() ``` \endgroup -```{r lm_barplot_metrics} +```{r gam_barplot_metrics} #| label: fig-gam_metrics -#| fig-cap: "Summary of GAM models for GPP estimation using the vegetation indices on a monthly (a), weekly (b), and daily (c) basis." +#| fig-cap: "Summary of GAM models for GPP estimation using the vegetation indices. Column A represents the metrics for the monthly models, B the weekly, and C the daily metrics." #| echo: false #| message: false #| warning: false -library(cowplot) # Create a function to generate the plot create_metrics_plot <- function(timescale, y_var, ylim_range) { From 40ee2be7863593e2a17a2dac4b7971912f211d0e Mon Sep 17 00:00:00 2001 From: ronnyhdez Date: Wed, 23 Aug 2023 14:45:32 -0600 Subject: [PATCH 09/11] Ref #77 heigth and witdh of metrics plots fixed --- chapter_2_lm.qmd | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/chapter_2_lm.qmd b/chapter_2_lm.qmd index b487a07..7feda7f 100644 --- a/chapter_2_lm.qmd +++ b/chapter_2_lm.qmd @@ -736,6 +736,8 @@ vis_site_glance_daily %>% ```{r lm_barplot_metrics} #| label: fig-lm_metrics #| fig-cap: "Summary of Linear models for GPP estimation using the vegetation indices on a monthly (a), weekly (b), and daily (c) basis." +#| fig-width: 7 +#| fig-height: 9 #| echo: false #| message: false #| warning: false @@ -752,7 +754,9 @@ create_metrics_plot <- function(timescale, y_var, ylim_range) { geom_col(position = "dodge") + coord_cartesian(ylim = ylim_range) + scale_fill_viridis_d() + - theme_minimal_hgrid() + labs(x = "") + + theme_minimal_hgrid(font_size = 12) + + theme(axis.text.x = element_text(angle = 45, h = 1)) } # Response variables are going to be the same: @@ -760,17 +764,17 @@ response_vars <- c("r.squared", "mae", "rmse") # Monthly plots monthly_metrics_plots <- map2(response_vars, - list(c(0.5, 1), c(0.5, 2.5), c(0.8, 3)), + list(c(0.3, 1), c(0.5, 4), c(0.5, 5)), ~ create_metrics_plot("monthly", .x, .y)) # Weekly plots weekly_metrics_plots <- map2(response_vars, - list(c(0.3, 0.9), c(0.8, 3.2), c(1.4, 3.6)), + list(c(0.3, 1), c(0.5, 4), c(0.5, 5)), ~ create_metrics_plot("weekly", .x, .y)) # Daily plots daily_metrics_plots <- map2(response_vars, - list(c(0.3, 0.9), c(0.8, 3.2), c(1.4, 3.6)), + list(c(0.3, 1), c(0.5, 4), c(0.5, 5)), ~ create_metrics_plot("daily", .x, .y)) # Grid the plots as should go in the chapter @@ -919,6 +923,8 @@ all_sites_gam_daily %>% ```{r gam_barplot_metrics} #| label: fig-gam_metrics #| fig-cap: "Summary of GAM models for GPP estimation using the vegetation indices. Column A represents the metrics for the monthly models, B the weekly, and C the daily metrics." +#| fig-width: 7 +#| fig-height: 9 #| echo: false #| message: false #| warning: false @@ -928,13 +934,15 @@ create_metrics_plot <- function(timescale, y_var, ylim_range) { get(paste0("all_sites_gam_", timescale)) %>% # select(site, index, {{ y_var }}) %>% bind_rows(get(paste0("vis_sites_gam_", timescale)), - get(paste0("all_sites_all_vis_gam_", timescale)), + get(paste0("all_sites_all_vis_gam_", timescale)), get(paste0("all_vis_gam_", timescale))) %>% ggplot(aes(x = site, y = .data[[y_var]], fill = index)) + geom_col(position = "dodge") + coord_cartesian(ylim = ylim_range) + scale_fill_viridis_d() + - theme_minimal_hgrid() + labs(x = "") + + theme_minimal_hgrid(font_size = 12) + + theme(axis.text.x = element_text(angle = 45, h = 1)) } # Response variables are going to be the same: @@ -942,17 +950,17 @@ response_vars <- c("rsq", "mae", "rmse") # Monthly plots monthly_metrics_plots <- map2(response_vars, - list(c(0.5, 1), c(0.5, 2.5), c(0.8, 3)), + list(c(0.3, 1), c(0.5, 4), c(0.8, 5)), ~ create_metrics_plot("monthly", .x, .y)) # Weekly plots weekly_metrics_plots <- map2(response_vars, - list(c(0.3, 0.9), c(0.8, 3.2), c(1.4, 3.6)), + list(c(0.3, 1), c(0.5, 4), c(0.8, 5)), ~ create_metrics_plot("weekly", .x, .y)) # Daily plots daily_metrics_plots <- map2(response_vars, - list(c(0.3, 0.9), c(0.8, 3.2), c(1.4, 3.6)), + list(c(0.3, 1), c(0.5, 4), c(0.8, 5)), ~ create_metrics_plot("daily", .x, .y)) # Grid the plots as should go in the chapter From 6a8196c8fd6b3a782ad4c611b59c5cd6d8e3c383 Mon Sep 17 00:00:00 2001 From: ronnyhdez Date: Wed, 23 Aug 2023 16:03:36 -0600 Subject: [PATCH 10/11] Ref #79 review and fix gam results --- scripts/gam_preparation.R | 7 ++-- test_plot_residuals.R | 76 --------------------------------------- 2 files changed, 3 insertions(+), 80 deletions(-) diff --git a/scripts/gam_preparation.R b/scripts/gam_preparation.R index a42896a..25e2c61 100644 --- a/scripts/gam_preparation.R +++ b/scripts/gam_preparation.R @@ -17,7 +17,6 @@ library(tidymodels) library(broom) library(usemodels) library(vip) -library(h2o) library(mgcv) source("scripts/models_data_preparation.R") @@ -141,7 +140,7 @@ mod_fun <- function(df) { s(evi_mean) + s(nirv_mean) + s(cci_mean), - data = daily_gam, + data = df, method = 'REML') } @@ -252,7 +251,7 @@ mod_fun <- function(df) { s(evi_mean) + s(nirv_mean) + s(cci_mean), - data = weekly_gam, + data = df, method = 'REML') } @@ -363,7 +362,7 @@ mod_fun <- function(df) { s(evi_mean) + s(nirv_mean) + s(cci_mean), - data = weekly_gam, + data = df, method = 'REML') } diff --git a/test_plot_residuals.R b/test_plot_residuals.R index 2eb16ed..8f538c4 100644 --- a/test_plot_residuals.R +++ b/test_plot_residuals.R @@ -45,83 +45,7 @@ all_sites_lm %>% scale_color_viridis_d() + theme_ridges() -# Barplot monthly -------------------------------------------------------------- -response_vars <- c("r.squared", "mae", "rmse") -# Create a function to generate the plot -create_plot <- function(y_var, ylim_range) { - vis_site_glance_montly %>% - select(site, index, {{ y_var }}) %>% - bind_rows(all_sites_glance_monthly, - all_sites_all_vis_glance_monthly, - all_vis_glance_monthly) %>% - ggplot(aes(x = site, y = .data[[y_var]], fill = index)) + - geom_col(position = "dodge") + - coord_cartesian(ylim = ylim_range) + - scale_fill_viridis_d() + - theme_minimal_hgrid() -} - -# Map over the response variables and create the plots -monthly_metrics_plots <- map2(response_vars, - list(c(0.5, 1), c(0.5, 2.5), c(0.8, 3)), - create_plot) - -# Grid the plots as should go in the chapter -# monthly_metrics_plot <- plot_grid(plots[[1]], plots[[2]], plots[[3]], -# nrow = 3) - -# Weekly -create_plot <- function(y_var, ylim_range) { - vis_site_glance_weekly %>% - select(site, index, {{ y_var }}) %>% - bind_rows(all_sites_glance_weekly, - all_sites_all_vis_glance_weekly, - all_vis_glance_weekly) %>% - ggplot(aes(x = site, y = .data[[y_var]], fill = index)) + - geom_col(position = "dodge") + - coord_cartesian(ylim = ylim_range) + - scale_fill_viridis_d() + - theme_minimal_hgrid() -} - -# Map over the response variables and create the plots -plots <- map2(response_vars, - list(c(0.3, 0.9), c(0.8, 3.2), c(1.4, 3.6)), - create_plot) - -# Grid the plots as should go in the chapter -weekly_metrics_plot <- plot_grid(plots[[1]], plots[[2]], plots[[3]], - nrow = 3) - -# Weekly -create_plot <- function(y_var, ylim_range) { - vis_site_glance_weekly %>% - select(site, index, {{ y_var }}) %>% - bind_rows(all_sites_glance_daily, - all_sites_all_vis_glance_daily, - all_vis_glance_daily) %>% - ggplot(aes(x = site, y = .data[[y_var]], fill = index)) + - geom_col(position = "dodge") + - coord_cartesian(ylim = ylim_range) + - scale_fill_viridis_d() + - theme_minimal_hgrid() -} - -# Map over the response variables and create the plots -plots <- map2(response_vars, - list(c(0.3, 0.9), c(0.8, 3.2), c(1.4, 3.6)), - create_plot) - -# Grid the plots as should go in the chapter -daily_metrics_plot <- plot_grid(plots[[1]], plots[[2]], plots[[3]], - nrow = 3) - -plot_grid(monthly_metrics_plot, - weekly_metrics_plot, - daily_metrics_plot, - nrow = 3, - ncol = 3) # Si tengo tiempo, revisar este proceso # library(tidymodels) From 5af0081b5ad7f0847810393fb98301c6ac5212fd Mon Sep 17 00:00:00 2001 From: ronnyhdez Date: Wed, 23 Aug 2023 16:33:20 -0600 Subject: [PATCH 11/11] Ref #80 fix all vis models for GAM --- scripts/gam_preparation.R | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/scripts/gam_preparation.R b/scripts/gam_preparation.R index 25e2c61..99470c5 100644 --- a/scripts/gam_preparation.R +++ b/scripts/gam_preparation.R @@ -370,6 +370,8 @@ group_site <- monthly_gam %>% nest(data = c(-site)) models <- group_site %>% + # Michigan & Bartlett have few data points. + filter(site == "Borden") %>% mutate(model = map(data, mod_fun), index = "All") @@ -378,7 +380,18 @@ all_vis_gam_monthly <- models %>% rsq = map_dbl(model, rsq_fun), rmse = map_dbl(model, rmse_fun), mae = map_dbl(model, mae_fun)) %>% - arrange(desc(rsq)) + arrange(desc(rsq)) %>% + # Insert NA values for Bartlett & Michigan which does not have + # enough obs. for this kind of model + bind_rows( + tibble( + index = rep("All", 2), + site = c("Bartlett", "Michigan"), + rsq = rep(NA, 2), + rmse = rep(NA, 2), + mae = rep(NA, 2) + ) + ) # GAM model for all sites and all indices (covariates) [H | All VIs + site] single_vis_monthly <- gam(gpp_dt_vut_ref ~ ndvi_mean +