diff --git a/R/report.compare.loo.R b/R/report.compare.loo.R
index 7225dae4..f720b66f 100644
--- a/R/report.compare.loo.R
+++ b/R/report.compare.loo.R
@@ -3,8 +3,8 @@
#' Automatically report the results of Bayesian model comparison using the `loo` package.
#'
#' @param x An object of class [brms::loo_compare].
-#' @param index type if index to report - expected log pointwise predictive
-#' density (ELPD) or information criteria (IC).
+#' @param include_IC Whether to include the information criteria (IC).
+#' @param include_ENP Whether to include the effective number of parameters (ENP).
#' @param ... Additional arguments (not used for now).
#'
#' @examplesIf require("brms", quietly = TRUE)
@@ -13,12 +13,17 @@
#'
#' m1 <- brms::brm(mpg ~ qsec, data = mtcars)
#' m2 <- brms::brm(mpg ~ qsec + drat, data = mtcars)
+#' m3 <- brms::brm(mpg ~ qsec + drat + wt, data = mtcars)
#'
-#' x <- brms::loo_compare(brms::add_criterion(m1, "loo"),
+#' x <- brms::loo_compare(
+#' brms::add_criterion(m1, "loo"),
#' brms::add_criterion(m2, "loo"),
-#' model_names = c("m1", "m2")
+#' brms::add_criterion(m3, "loo"),
+#' model_names = c("m1", "m2", "m3")
#' )
#' report(x)
+#' report(x, include_IC = FALSE)
+#' report(x, include_ENP = TRUE)
#' }
#'
#' @details
@@ -26,11 +31,15 @@
#' absolute value of elpd_diff) is less than 4 (Sivula, Magnusson and Vehtari, 2020).
#' If superior to 4, then one can use the SE to obtain a standardized difference
#' (Z-diff) and interpret it as such, assuming that the difference is normally
-#' distributed.
+#' distributed. The corresponding p-value is then calculated as `2 * pnorm(-abs(Z-diff))`.
+#' However, note that if the raw ELPD difference is small (less than 4), it doesn't
+#' make much sense to rely on its standardized value: it is not very useful to
+#' conclude that a model is much better than another if both models make very
+#' similar predictions.
#'
#' @return Objects of class [report_text()].
#' @export
-report.compare.loo <- function(x, index = c("ELPD", "IC"), ...) {
+report.compare.loo <- function(x, include_IC = TRUE, include_ENP = FALSE, ...) {
# nolint start
# https://stats.stackexchange.com/questions/608881/how-to-interpret-elpd-diff-of-bayesian-loo-estimate-in-bayesian-logistic-regress
# nolint end
@@ -40,72 +49,89 @@ report.compare.loo <- function(x, index = c("ELPD", "IC"), ...) {
# The difference in expected log predictive density (elpd) between each model
# and the best model as well as the standard error of this difference (assuming
# the difference is approximately normal).
- index <- match.arg(index)
- x <- as.data.frame(x)
+
# The values in the first row are 0s because the models are ordered from best to worst according to their elpd.
+ x <- as.data.frame(x)
modnames <- rownames(x)
elpd_diff <- x[["elpd_diff"]]
+ se_elpd_diff <- x[["se_diff"]]
ic_diff <- -2 * elpd_diff
- z_elpd_diff <- elpd_diff / x[["se_diff"]]
+ z_elpd_diff <- elpd_diff / se_elpd_diff
+ p_elpd_diff <- 2 * stats::pnorm(-abs(z_elpd_diff))
z_ic_diff <- -z_elpd_diff
if ("looic" %in% colnames(x)) {
- type <- "LOO"
- ENP <- x[["p_loo"]]
+ elpd <- x[["elpd_loo"]]
+ enp <- x[["p_loo"]]
+ index_label <- "ELPD-LOO"
+ ic <- x[["looic"]]
+ index_ic <- "LOOIC"
} else {
- type <- "WAIC"
- ENP <- x[["p_waic"]]
+ elpd <- x[["elpd_waic"]]
+ enp <- x[["p_waic"]]
+ index_label <- "ELPD-WAIC"
+ ic <- x[["waic"]]
+ index_ic <- "WAIC"
}
- if (index == "ELPD") {
- index_label <- sprintf("Expected Log Predictive Density (ELPD-%s)", type)
- } else if (type == "LOO") {
- index_label <- "Leave-One-Out CV Information Criterion (LOOIC)"
- } else {
- index_label <- "Widely Applicable Information Criterion (WAIC)"
- }
+ # TODO: The above indices-computation and name-matching should be implemented
+ # in a parameters.compare.loo() function which would be run here.
- out_text <- sprintf(
- paste(
- "The difference in predictive accuracy, as index by %s, suggests that '%s' ",
- "is the best model (effective number of parameters (ENP) = %.2f), followed by"
+ # Starting text -----
+ text1 <- sprintf(
+ paste0(
+ "The difference in predictive accuracy, as indexed by Expected Log ",
+ "Predictive Density (%s), suggests that '%s' is the best model ("
),
- index_label, modnames[1], ENP[1]
+ index_label, modnames[1]
)
-
- if (index == "ELPD") {
- other_texts <- sprintf(
- "'%s' (diff = %.2f, ENP = %.2f, z-diff = %.2f)",
- modnames[-1],
- elpd_diff[-1],
- ENP[-1],
- z_elpd_diff[-1]
- )
+ if (all(c(include_IC, include_ENP))) {
+ if (include_IC) {
+ text1 <- sprintf(paste0(text1, "%s = %.2f"), index_ic, ic[1])
+ }
+ if (include_ENP) {
+ if (include_IC) {
+ text1 <- sprintf(paste0(text1, ", ENP = %.2f)"), enp[1])
+ } else {
+ text1 <- sprintf(paste0(text1, "ENP = %.2f)"), enp[1])
+ }
+ } else {
+ text1 <- paste0(text1, ")")
+ }
} else {
- other_texts <- sprintf(
- "'%s' (diff = %.2f, ENP = %.2f, z-diff = %.2f)",
- modnames[-1],
- ic_diff[-1],
- ENP[-1],
- z_ic_diff[-1]
- )
+ text1 <- sprintf(paste0(text1, "ELPD = %.2f)"), elpd[1])
}
- sep <- "."
- nothermods <- length(other_texts)
- if (nothermods > 1L) {
- if (nothermods == 2L) {
- sep <- c(" and ", sep)
+ # Other models ---
+ text_models <- sprintf(
+ "'%s' (diff-ELPD = %.2f +- %.2f, %s",
+ modnames[-1],
+ elpd_diff[-1],
+ se_elpd_diff[-1],
+ insight::format_p(p_elpd_diff[-1])
+ )
+
+ if (all(c(include_IC, include_ENP))) {
+ if (include_IC) {
+ text_models <- sprintf(paste0(text_models, ", %s = %.2f"), index_ic, ic[-1])
+ }
+ if (include_ENP) {
+ if (include_IC) {
+ text_models <- sprintf(paste0(text_models, ", ENP = %.2f)"), enp[-1])
+ } else {
+ text_models <- sprintf(paste0(text_models, "ENP = %.2f)"), enp[-1])
+ }
} else {
- sep <- c(rep(", ", length = nothermods - 2), ", and ", sep)
+ text_models <- sprintf(paste0(text_models, ")"))
}
+ } else {
+ text_models <- paste0(text_models, ")")
}
- other_texts <- paste0(other_texts, sep, collapse = "")
- out_text <- paste(out_text, other_texts, collapse = "")
- class(text) <- c("report_text", class(text))
- out_text
+ text1 <- paste0(text1, ", followed by ", datawizard::text_concatenate(text_models))
+ class(text1) <- c("report_text", class(text1))
+ text1
}
diff --git a/README.Rmd b/README.Rmd
index c5735393..7bca5c21 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -79,7 +79,7 @@ The package documentation can be found [**here**](https://easystats.github.io/re
## Report all the things
-
+
### General Workflow
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 6f3ef10d..59204c57 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -6,6 +6,7 @@ CMD
CSL
Dom
ELPD
+ENP
ESS
Gabry
Hotfix
diff --git a/man/report.compare.loo.Rd b/man/report.compare.loo.Rd
index 47c2ff3b..abb3d4d0 100644
--- a/man/report.compare.loo.Rd
+++ b/man/report.compare.loo.Rd
@@ -4,13 +4,14 @@
\alias{report.compare.loo}
\title{Reporting Bayesian Model Comparison}
\usage{
-\method{report}{compare.loo}(x, index = c("ELPD", "IC"), ...)
+\method{report}{compare.loo}(x, include_IC = TRUE, include_ENP = FALSE, ...)
}
\arguments{
\item{x}{An object of class \link[brms:loo_compare.brmsfit]{brms::loo_compare}.}
-\item{index}{type if index to report - expected log pointwise predictive
-density (ELPD) or information criteria (IC).}
+\item{include_IC}{Whether to include the information criteria (IC).}
+
+\item{include_ENP}{Whether to include the effective number of parameters (ENP).}
\item{...}{Additional arguments (not used for now).}
}
@@ -25,7 +26,11 @@ The rule of thumb is that the models are "very similar" if |elpd_diff| (the
absolute value of elpd_diff) is less than 4 (Sivula, Magnusson and Vehtari, 2020).
If superior to 4, then one can use the SE to obtain a standardized difference
(Z-diff) and interpret it as such, assuming that the difference is normally
-distributed.
+distributed. The corresponding p-value is then calculated as \code{2 * pnorm(-abs(Z-diff))}.
+However, note that if the raw ELPD difference is small (less than 4), it doesn't
+make much sense to rely on its standardized value: it is not very useful to
+conclude that a model is much better than another if both models make very
+similar predictions.
}
\examples{
\dontshow{if (require("brms", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
@@ -34,12 +39,17 @@ library(brms)
m1 <- brms::brm(mpg ~ qsec, data = mtcars)
m2 <- brms::brm(mpg ~ qsec + drat, data = mtcars)
+m3 <- brms::brm(mpg ~ qsec + drat + wt, data = mtcars)
-x <- brms::loo_compare(brms::add_criterion(m1, "loo"),
+x <- brms::loo_compare(
+ brms::add_criterion(m1, "loo"),
brms::add_criterion(m2, "loo"),
- model_names = c("m1", "m2")
+ brms::add_criterion(m3, "loo"),
+ model_names = c("m1", "m2", "m3")
)
report(x)
+report(x, include_IC = FALSE)
+report(x, include_ENP = TRUE)
}
\dontshow{\}) # examplesIf}
}