From 201e9790ebeeb8ca4ca78dc97d0eac8c043c1355 Mon Sep 17 00:00:00 2001 From: Dominique Makowski Date: Tue, 7 May 2024 19:11:47 +0100 Subject: [PATCH] report.compare.loo: minor edits (#427) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * edits * Adding reference for all the things meme #428 * fix errors, wordlist, styler, lints --------- Co-authored-by: Daniel Co-authored-by: Rémi Thériault <13123390+rempsyc@users.noreply.github.com> --- R/report.compare.loo.R | 128 +++++++++++++++++++++++--------------- README.Rmd | 2 +- inst/WORDLIST | 1 + man/report.compare.loo.Rd | 22 +++++-- 4 files changed, 95 insertions(+), 58 deletions(-) diff --git a/R/report.compare.loo.R b/R/report.compare.loo.R index 7225dae4..f720b66f 100644 --- a/R/report.compare.loo.R +++ b/R/report.compare.loo.R @@ -3,8 +3,8 @@ #' Automatically report the results of Bayesian model comparison using the `loo` package. #' #' @param x An object of class [brms::loo_compare]. -#' @param index type if index to report - expected log pointwise predictive -#' density (ELPD) or information criteria (IC). +#' @param include_IC Whether to include the information criteria (IC). +#' @param include_ENP Whether to include the effective number of parameters (ENP). #' @param ... Additional arguments (not used for now). #' #' @examplesIf require("brms", quietly = TRUE) @@ -13,12 +13,17 @@ #' #' m1 <- brms::brm(mpg ~ qsec, data = mtcars) #' m2 <- brms::brm(mpg ~ qsec + drat, data = mtcars) +#' m3 <- brms::brm(mpg ~ qsec + drat + wt, data = mtcars) #' -#' x <- brms::loo_compare(brms::add_criterion(m1, "loo"), +#' x <- brms::loo_compare( +#' brms::add_criterion(m1, "loo"), #' brms::add_criterion(m2, "loo"), -#' model_names = c("m1", "m2") +#' brms::add_criterion(m3, "loo"), +#' model_names = c("m1", "m2", "m3") #' ) #' report(x) +#' report(x, include_IC = FALSE) +#' report(x, include_ENP = TRUE) #' } #' #' @details @@ -26,11 +31,15 @@ #' absolute value of elpd_diff) is less than 4 (Sivula, Magnusson and Vehtari, 2020). #' If superior to 4, then one can use the SE to obtain a standardized difference #' (Z-diff) and interpret it as such, assuming that the difference is normally -#' distributed. +#' distributed. The corresponding p-value is then calculated as `2 * pnorm(-abs(Z-diff))`. +#' However, note that if the raw ELPD difference is small (less than 4), it doesn't +#' make much sense to rely on its standardized value: it is not very useful to +#' conclude that a model is much better than another if both models make very +#' similar predictions. #' #' @return Objects of class [report_text()]. #' @export -report.compare.loo <- function(x, index = c("ELPD", "IC"), ...) { +report.compare.loo <- function(x, include_IC = TRUE, include_ENP = FALSE, ...) { # nolint start # https://stats.stackexchange.com/questions/608881/how-to-interpret-elpd-diff-of-bayesian-loo-estimate-in-bayesian-logistic-regress # nolint end @@ -40,72 +49,89 @@ report.compare.loo <- function(x, index = c("ELPD", "IC"), ...) { # The difference in expected log predictive density (elpd) between each model # and the best model as well as the standard error of this difference (assuming # the difference is approximately normal). - index <- match.arg(index) - x <- as.data.frame(x) + # The values in the first row are 0s because the models are ordered from best to worst according to their elpd. + x <- as.data.frame(x) modnames <- rownames(x) elpd_diff <- x[["elpd_diff"]] + se_elpd_diff <- x[["se_diff"]] ic_diff <- -2 * elpd_diff - z_elpd_diff <- elpd_diff / x[["se_diff"]] + z_elpd_diff <- elpd_diff / se_elpd_diff + p_elpd_diff <- 2 * stats::pnorm(-abs(z_elpd_diff)) z_ic_diff <- -z_elpd_diff if ("looic" %in% colnames(x)) { - type <- "LOO" - ENP <- x[["p_loo"]] + elpd <- x[["elpd_loo"]] + enp <- x[["p_loo"]] + index_label <- "ELPD-LOO" + ic <- x[["looic"]] + index_ic <- "LOOIC" } else { - type <- "WAIC" - ENP <- x[["p_waic"]] + elpd <- x[["elpd_waic"]] + enp <- x[["p_waic"]] + index_label <- "ELPD-WAIC" + ic <- x[["waic"]] + index_ic <- "WAIC" } - if (index == "ELPD") { - index_label <- sprintf("Expected Log Predictive Density (ELPD-%s)", type) - } else if (type == "LOO") { - index_label <- "Leave-One-Out CV Information Criterion (LOOIC)" - } else { - index_label <- "Widely Applicable Information Criterion (WAIC)" - } + # TODO: The above indices-computation and name-matching should be implemented + # in a parameters.compare.loo() function which would be run here. - out_text <- sprintf( - paste( - "The difference in predictive accuracy, as index by %s, suggests that '%s' ", - "is the best model (effective number of parameters (ENP) = %.2f), followed by" + # Starting text ----- + text1 <- sprintf( + paste0( + "The difference in predictive accuracy, as indexed by Expected Log ", + "Predictive Density (%s), suggests that '%s' is the best model (" ), - index_label, modnames[1], ENP[1] + index_label, modnames[1] ) - - if (index == "ELPD") { - other_texts <- sprintf( - "'%s' (diff = %.2f, ENP = %.2f, z-diff = %.2f)", - modnames[-1], - elpd_diff[-1], - ENP[-1], - z_elpd_diff[-1] - ) + if (all(c(include_IC, include_ENP))) { + if (include_IC) { + text1 <- sprintf(paste0(text1, "%s = %.2f"), index_ic, ic[1]) + } + if (include_ENP) { + if (include_IC) { + text1 <- sprintf(paste0(text1, ", ENP = %.2f)"), enp[1]) + } else { + text1 <- sprintf(paste0(text1, "ENP = %.2f)"), enp[1]) + } + } else { + text1 <- paste0(text1, ")") + } } else { - other_texts <- sprintf( - "'%s' (diff = %.2f, ENP = %.2f, z-diff = %.2f)", - modnames[-1], - ic_diff[-1], - ENP[-1], - z_ic_diff[-1] - ) + text1 <- sprintf(paste0(text1, "ELPD = %.2f)"), elpd[1]) } - sep <- "." - nothermods <- length(other_texts) - if (nothermods > 1L) { - if (nothermods == 2L) { - sep <- c(" and ", sep) + # Other models --- + text_models <- sprintf( + "'%s' (diff-ELPD = %.2f +- %.2f, %s", + modnames[-1], + elpd_diff[-1], + se_elpd_diff[-1], + insight::format_p(p_elpd_diff[-1]) + ) + + if (all(c(include_IC, include_ENP))) { + if (include_IC) { + text_models <- sprintf(paste0(text_models, ", %s = %.2f"), index_ic, ic[-1]) + } + if (include_ENP) { + if (include_IC) { + text_models <- sprintf(paste0(text_models, ", ENP = %.2f)"), enp[-1]) + } else { + text_models <- sprintf(paste0(text_models, "ENP = %.2f)"), enp[-1]) + } } else { - sep <- c(rep(", ", length = nothermods - 2), ", and ", sep) + text_models <- sprintf(paste0(text_models, ")")) } + } else { + text_models <- paste0(text_models, ")") } - other_texts <- paste0(other_texts, sep, collapse = "") - out_text <- paste(out_text, other_texts, collapse = "") - class(text) <- c("report_text", class(text)) - out_text + text1 <- paste0(text1, ", followed by ", datawizard::text_concatenate(text_models)) + class(text1) <- c("report_text", class(text1)) + text1 } diff --git a/README.Rmd b/README.Rmd index c5735393..7bca5c21 100644 --- a/README.Rmd +++ b/README.Rmd @@ -79,7 +79,7 @@ The package documentation can be found [**here**](https://easystats.github.io/re ## Report all the things - +All the things meme by Allie Brosh ### General Workflow diff --git a/inst/WORDLIST b/inst/WORDLIST index 6f3ef10d..59204c57 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -6,6 +6,7 @@ CMD CSL Dom ELPD +ENP ESS Gabry Hotfix diff --git a/man/report.compare.loo.Rd b/man/report.compare.loo.Rd index 47c2ff3b..abb3d4d0 100644 --- a/man/report.compare.loo.Rd +++ b/man/report.compare.loo.Rd @@ -4,13 +4,14 @@ \alias{report.compare.loo} \title{Reporting Bayesian Model Comparison} \usage{ -\method{report}{compare.loo}(x, index = c("ELPD", "IC"), ...) +\method{report}{compare.loo}(x, include_IC = TRUE, include_ENP = FALSE, ...) } \arguments{ \item{x}{An object of class \link[brms:loo_compare.brmsfit]{brms::loo_compare}.} -\item{index}{type if index to report - expected log pointwise predictive -density (ELPD) or information criteria (IC).} +\item{include_IC}{Whether to include the information criteria (IC).} + +\item{include_ENP}{Whether to include the effective number of parameters (ENP).} \item{...}{Additional arguments (not used for now).} } @@ -25,7 +26,11 @@ The rule of thumb is that the models are "very similar" if |elpd_diff| (the absolute value of elpd_diff) is less than 4 (Sivula, Magnusson and Vehtari, 2020). If superior to 4, then one can use the SE to obtain a standardized difference (Z-diff) and interpret it as such, assuming that the difference is normally -distributed. +distributed. The corresponding p-value is then calculated as \code{2 * pnorm(-abs(Z-diff))}. +However, note that if the raw ELPD difference is small (less than 4), it doesn't +make much sense to rely on its standardized value: it is not very useful to +conclude that a model is much better than another if both models make very +similar predictions. } \examples{ \dontshow{if (require("brms", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} @@ -34,12 +39,17 @@ library(brms) m1 <- brms::brm(mpg ~ qsec, data = mtcars) m2 <- brms::brm(mpg ~ qsec + drat, data = mtcars) +m3 <- brms::brm(mpg ~ qsec + drat + wt, data = mtcars) -x <- brms::loo_compare(brms::add_criterion(m1, "loo"), +x <- brms::loo_compare( + brms::add_criterion(m1, "loo"), brms::add_criterion(m2, "loo"), - model_names = c("m1", "m2") + brms::add_criterion(m3, "loo"), + model_names = c("m1", "m2", "m3") ) report(x) +report(x, include_IC = FALSE) +report(x, include_ENP = TRUE) } \dontshow{\}) # examplesIf} }