diff --git a/DESCRIPTION b/DESCRIPTION index 06a57648..94a5f611 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -66,6 +66,7 @@ Suggests: pROC, psych, scales, + splines, sjPlot, survey, rstan, diff --git a/R/mcse.R b/R/mcse.R index cb191547..b8d7d76b 100644 --- a/R/mcse.R +++ b/R/mcse.R @@ -16,6 +16,9 @@ mcse.brmsfit <- function(x, type = c("fixed", "random", "all"), ...) { #' @export mcse.stanmvreg <- function(x, type = c("fixed", "random", "all"), ...) { + # check arguments + type <- match.arg(type) + s <- summary(x) dat <- tibble::tibble( term = rownames(s), diff --git a/R/pred_vars.R b/R/pred_vars.R index 86ab28c7..a1abfc57 100644 --- a/R/pred_vars.R +++ b/R/pred_vars.R @@ -11,7 +11,7 @@ #' model, returns the model frame for fixed effects only. #' @param multi.resp Logical, if \code{TRUE} and model is a multivariate response #' model from a \code{brmsfit} object or of class \code{stanmvreg}, then a -#' list of values for each regression is returned. +#' list of values (one for each regression) is returned. #' #' @return For \code{pred_vars()} and \code{resp_var()}, the name(s) of the #' response or predictor variables from \code{x} as character vector. diff --git a/R/tidy_stan.R b/R/tidy_stan.R index c734c404..151f1810 100644 --- a/R/tidy_stan.R +++ b/R/tidy_stan.R @@ -97,19 +97,18 @@ tidy_stan <- function(x, prob = .89, typical = "median", trans = NULL, type = c( # compute HDI out.hdi <- hdi(x, prob = prob, trans = trans, type = type) - # we need names of elements, for correct removal + # get statistics nr <- bayesplot::neff_ratio(x) + # we need names of elements, for correct removal + if (inherits(x, "brmsfit")) { cnames <- make.names(names(nr)) keep <- cnames %in% out.hdi$term } else { - keep <- 1:nrow(out.hdi) + keep <- names(nr) %in% out.hdi$term } - - # compute additional statistics, like point estimate, standard errors etc. - nr <- nr[keep] ratio <- data.frame( term = names(nr), @@ -117,7 +116,17 @@ tidy_stan <- function(x, prob = .89, typical = "median", trans = NULL, type = c( stringsAsFactors = FALSE ) - rh <- bayesplot::rhat(x)[keep] + + rh <- bayesplot::rhat(x) + + if (inherits(x, "brmsfit")) { + cnames <- make.names(names(rh)) + keep <- cnames %in% out.hdi$term + } else { + keep <- names(rh) %in% out.hdi$term + } + + rh <- rh[keep] rhat <- data.frame( term = names(rh), rhat = rh, @@ -243,6 +252,9 @@ tidy_stan <- function(x, prob = .89, typical = "median", trans = NULL, type = c( } + ## TODO extract Sigma for stanmvreg random effects + + # find random slopes rs1 <- grep("b\\[(.*) (.*)\\]", out$term) @@ -324,7 +336,32 @@ tidy_stan <- function(x, prob = .89, typical = "median", trans = NULL, type = c( } - ## TODO add support for multivariate response model for rstanarm + + if (inherits(x, "stanmvreg")) { + + # get response variables + + responses <- resp_var(x) + resp.names <- names(responses) + + + # create "response-level" variable + + out <- tibble::add_column(out, response = "", .before = 1) + + + # copy name of response into new character variable + # and remove response name from term name + + for (i in 1:length(responses)) { + pattern <- paste0(resp.names[i], "|") + m <- tidyselect::starts_with(pattern, vars = out$term) + out$response[intersect(which(out$response == ""), m)] <- responses[i] + out$term <- gsub(pattern, "", out$term, fixed = TRUE) + } + + } + class(out) <- c("tidy_stan", class(out)) diff --git a/inst/doc/anova-statistics.html b/inst/doc/anova-statistics.html index 666c4782..3a4dbb6d 100644 --- a/inst/doc/anova-statistics.html +++ b/inst/doc/anova-statistics.html @@ -12,7 +12,7 @@ - +
For multilevel models, the type
-argument defines whether the HDI of fixed, random or all effects are shown.
hdi(m5, type = "random")
@@ -183,14 +183,14 @@ Highest Density Interval
#> # Highest Density Interval
#>
#> HDI(90%)
-#> r_e15relat.1.Intercept. [-0.16 1.30]
-#> r_e15relat.2.Intercept. [-0.14 1.09]
-#> r_e15relat.3.Intercept. [-0.85 0.77]
-#> r_e15relat.4.Intercept. [-0.58 0.79]
-#> r_e15relat.5.Intercept. [-0.90 0.85]
-#> r_e15relat.6.Intercept. [-1.58 0.28]
-#> r_e15relat.7.Intercept. [-1.10 0.79]
-#> r_e15relat.8.Intercept. [-0.90 0.47]
The computation for the HDI is based on the code from Kruschke 2015, pp. 727f. For default sampling in Stan (4000 samples), the 90% intervals for HDI are more stable than, for instance, 95% intervals. An effective sample size of at least 10.000 is recommended if 95% intervals should be computed (see Kruschke 2015, p. 183ff).
rope()
does not suggest limits for the region of practical equivalence and does not tell you how big is practically equivalent to the null value. However, there are suggestions how to choose reasonable limits (see Kruschke 2018), which are implemented in the equi_test()
functions.
For models with binary outcome, there is no concrete way to derive the effect size that defines the ROPE limits. Two examples from Kruschke suggest that a negligible change is about .05 on the logit-scale. In these cases, it is recommended to specify the rope
argument, however, if not specified, the ROPE limits are calculated in this way: 0 +/- .1 * sd(intercept) / 4
. For all other models, 0 +/- .1 * sd(intercept)
is used to determine the ROPE limits. These formulas are based on experience that worked well in real-life situations, but are most likely not generally the best approach.
Beside a numerical output, the results can also be printed as HTML-table or plotted, using the out
-argument. For plots, the 95% distributions of the posterior samles are shown, the ROPE is a light-blue shaded region in the plot, and the distributions are colored depending on whether the parameter values are accepted, rejected or undecided.
equi_test(m5, out = "plot")
Additional statistics in the output are:
To also show random effects of multilevel models, use the type
-argument.
# printing fixed and random effects of multilevel model
tidy_stan(m3, type = "all")
@@ -300,33 +300,33 @@ Tidy Summary of Bayesian Models
#>
#> ## Conditional Model: Fixed effects
#>
-#> estimate std.error HDI(89%) neff_ratio Rhat mcse
-#> Intercept 1.23 0.74 [-0.27 2.76] 0.21 1 0.03
-#> child -1.15 0.10 [-1.30 -0.99] 0.91 1 0.00
-#> camper 0.73 0.10 [ 0.57 0.87] 0.79 1 0.00
+#> estimate std.error HDI(89%) ratio rhat mcse
+#> Intercept 1.24 0.79 [-0.44 2.89] 0.04 1.03 0.08
+#> child -1.15 0.09 [-1.29 -0.99] 1.00 1.00 0.00
+#> camper 0.73 0.09 [ 0.58 0.89] 0.74 1.00 0.00
#>
#> ## Conditional Model: Random effect (Intercept: persons)
#>
-#> estimate std.error HDI(89%) neff_ratio Rhat mcse
-#> persons.1 -1.28 0.75 [-2.69 0.35] 0.22 1 0.03
-#> persons.2 -0.31 0.73 [-1.87 1.17] 0.21 1 0.03
-#> persons.3 0.40 0.74 [-1.02 2.00] 0.21 1 0.03
-#> persons.4 1.29 0.73 [-0.21 2.82] 0.21 1 0.03
+#> estimate std.error HDI(89%) ratio rhat mcse
+#> persons.1 -1.30 0.81 [-2.98 0.39] 0.04 1.03 0.09
+#> persons.2 -0.33 0.80 [-1.93 1.37] 0.04 1.03 0.08
+#> persons.3 0.38 0.79 [-1.36 1.94] 0.04 1.03 0.08
+#> persons.4 1.28 0.79 [-0.32 2.98] 0.04 1.03 0.08
#>
#> ## Zero-Inflated Model: Fixed effects
#>
-#> estimate std.error HDI(89%) neff_ratio Rhat mcse
-#> Intercept -0.71 0.74 [-2.09 0.50] 0.33 1 0.02
-#> child 1.88 0.32 [ 1.36 2.41] 0.87 1 0.01
-#> camper -0.84 0.35 [-1.40 -0.26] 0.73 1 0.01
+#> estimate std.error HDI(89%) ratio rhat mcse
+#> Intercept -0.69 0.74 [-1.99 0.51] 0.43 1 0.02
+#> child 1.90 0.32 [ 1.34 2.39] 0.73 1 0.01
+#> camper -0.83 0.35 [-1.40 -0.28] 0.83 1 0.01
#>
#> ## Zero-Inflated Model: Random effect (Intercept: persons)
#>
-#> estimate std.error HDI(89%) neff_ratio Rhat mcse
-#> persons.1 1.27 0.84 [ 0.04 2.80] 0.34 1 0.03
-#> persons.2 0.34 0.71 [-0.93 1.64] 0.31 1 0.02
-#> persons.3 -0.12 0.73 [-1.41 1.23] 0.33 1 0.02
-#> persons.4 -1.23 0.75 [-2.63 0.09] 0.33 1 0.02
By default, 89%-HDI are computed (a convention following McElreath 2015), but other or even multiple HDI can be computed using the prob
argument.
# two different HDI for multivariate response model
tidy_stan(m2, prob = c(.5, .95))
@@ -335,22 +335,22 @@ Tidy Summary of Bayesian Models
#>
#> ## Response: jobseek
#>
-#> estimate std.error HDI(50%) HDI(95%) neff_ratio Rhat mcse
-#> Intercept 3.67 0.13 [ 3.60 3.77] [ 3.43 3.92] 1 1 0
-#> treat 0.07 0.05 [ 0.03 0.10] [-0.03 0.16] 1 1 0
-#> econ_hard 0.05 0.03 [ 0.03 0.07] [ 0.00 0.10] 1 1 0
-#> sex -0.01 0.05 [-0.04 0.02] [-0.10 0.09] 1 1 0
-#> age 0.00 0.00 [ 0.00 0.01] [-0.00 0.01] 1 1 0
+#> estimate std.error HDI(50%) HDI(95%) ratio rhat mcse
+#> Intercept 3.67 0.12 [ 3.59 3.76] [ 3.43 3.93] 1 1 0
+#> treat 0.07 0.05 [ 0.03 0.10] [-0.04 0.16] 1 1 0
+#> econ_hard 0.05 0.02 [ 0.04 0.07] [ 0.01 0.10] 1 1 0
+#> sex -0.01 0.05 [-0.03 0.03] [-0.10 0.09] 1 1 0
+#> age 0.00 0.00 [ 0.00 0.01] [-0.00 0.01] 1 1 0
#>
#> ## Response: depress2
#>
-#> estimate std.error HDI(50%) HDI(95%) neff_ratio Rhat mcse
-#> Intercept 2.20 0.15 [ 2.13 2.34] [ 1.91 2.50] 1 1 0
-#> treat -0.04 0.04 [-0.07 -0.01] [-0.13 0.04] 1 1 0
-#> joseek -0.24 0.03 [-0.26 -0.22] [-0.30 -0.18] 1 1 0
-#> econ_hard 0.15 0.02 [ 0.13 0.16] [ 0.11 0.19] 1 1 0
-#> sex 0.11 0.04 [ 0.08 0.13] [ 0.03 0.19] 1 1 0
-#> age 0.00 0.00 [-0.00 0.00] [-0.00 0.00] 1 1 0
Typically, mediation()
finds the treatment and mediator variables automatically. If this does not work, use the treatment
and mediator
arguments to specify the related variable names. For all values, the 90% HDIs are calculated by default. Use prob
to calculate a different interval.
Here is a comparison with the mediation package. Note that the summary()
-output of the mediation package shows the indirect effect first, followed by the direct effect.
summary(m1)
@@ -386,10 +386,10 @@ Summary of Mediation Analysis
#> Quasi-Bayesian Confidence Intervals
#>
#> Estimate 95% CI Lower 95% CI Upper p-value
-#> ACME -0.0156 -0.0409 0.01 0.21
-#> ADE -0.0388 -0.1233 0.05 0.36
-#> Total Effect -0.0544 -0.1408 0.03 0.26
-#> Prop. Mediated 0.2292 -2.7499 2.51 0.36
+#> ACME -0.0165 -0.0416 0.01 0.17
+#> ADE -0.0366 -0.1187 0.05 0.40
+#> Total Effect -0.0530 -0.1422 0.03 0.27
+#> Prop. Mediated 0.2344 -1.8729 2.69 0.35
#>
#> Sample Size Used: 899
#>
@@ -405,11 +405,11 @@ Summary of Mediation Analysis
#> Response: depress2
#>
#> Estimate HDI (95%)
-#> Direct effect: -0.04 [-0.13 0.04]
+#> Direct effect: -0.04 [-0.12 0.04]
#> Indirect effect: -0.02 [-0.04 0.01]
#> Total effect: -0.06 [-0.14 0.04]
#>
-#> Proportion mediated: 27.31% [-183.77% 238.38%]
If you want to calculate mean instead of median values from the posterior samples, use the typical
-argument. Furthermore, there is a print()
-method, which allows to print more digits.
mediation(m2, typical = "mean", prob = .95) %>% print(digits = 4)
#>
@@ -420,11 +420,11 @@ Summary of Mediation Analysis
#> Response: depress2
#>
#> Estimate HDI (95%)
-#> Direct effect: -0.0404 [-0.1326 0.0416]
-#> Indirect effect: -0.0158 [-0.0383 0.0097]
-#> Total effect: -0.0563 [-0.1415 0.0376]
+#> Direct effect: -0.0401 [-0.1230 0.0447]
+#> Indirect effect: -0.0157 [-0.0407 0.0103]
+#> Total effect: -0.0559 [-0.1363 0.0372]
#>
-#> Proportion mediated: 28.1294% [-182.9469% 239.2057%]
As you can see, the results are similar to what the mediation package produces for non-Bayesian models.
For non-Gaussian models, there is no clean variance decomposition and hence the ICC can’t be calculated exactly. The general Bayesian way to analyse the random-effect variances is then to draw samples from the posterior predictive distribution, calculate the variances and compare how the variance across models changes when group-specific term are included or dropped.
You can achieve this with the ppd
-argument. In this case, draws from the posterior predictive distribution not conditioned on group-level terms (using posterior_predict(..., re.form = NA)
) as well as draws from this distribution conditioned on all random effects (by default, unless specified else in the re.form
-argument) are taken. Then, the variances for each of these draws are calculated. The “ICC” is then the ratio between these two variances.
icc(m4, ppd = TRUE, re.form = ~(1 | cyl), prob = .5)
@@ -472,14 +472,14 @@ ICC for multilevel models
#> Conditioned on: ~(1 | cyl)
#>
#> ## Variance Ratio (comparable to ICC)
-#> Ratio: 0.09 HDI 50%: [-0.03 0.33]
+#> Ratio: 0.12 HDI 50%: [0.04 0.40]
#>
#> ## Variances of Posterior Predicted Distribution
-#> Conditioned on fixed effects: 32.36 HDI 50%: [21.03 37.54]
-#> Conditioned on rand. effects: 36.64 HDI 50%: [25.70 41.82]
+#> Conditioned on fixed effects: 33.01 HDI 50%: [22.34 36.33]
+#> Conditioned on rand. effects: 38.86 HDI 50%: [29.30 45.07]
#>
#> ## Difference in Variances
-#> Difference: 4.27 HDI 50%: [-2.60 10.52]
Sometimes, when the variance of the posterior predictive distribution is very large, the variance ratio in the output makes no sense, e.g. because it is negative. In such cases, it might help to use a more robust measure to calculate the central tendency of the variances. This can be done with the typical
-argument.
# the "classical" ICC, not recommended for non-Gaussian
icc(m3)
@@ -489,8 +489,8 @@ ICC for multilevel models
#> Family: zero_inflated_poisson (log)
#>
#> ## persons
-#> ICC: 0.68 HDI 89%: [0.43 0.96]
-#> Between-group: 4.00 HDI 89%: [0.22 9.02]
+#> ICC: 0.69 HDI 89%: [0.42 0.96]
+#> Between-group: 4.63 HDI 89%: [0.18 10.38]
#>
#> ## Residuals
#> Within-group: 1.00 HDI 89%: [1.00 1.00]
@@ -505,14 +505,14 @@ ICC for multilevel models
#> Conditioned on: all random effects
#>
#> ## Variance Ratio (comparable to ICC)
-#> Ratio: -5.09 HDI 89%: [-0.64 1.00]
+#> Ratio: -9.80 HDI 89%: [-1.52 1.00]
#>
#> ## Variances of Posterior Predicted Distribution
-#> Conditioned on fixed effects: 240.83 HDI 89%: [ 0.02 65.39]
-#> Conditioned on rand. effects: 39.49 HDI 89%: [31.32 47.81]
+#> Conditioned on fixed effects: 421.06 HDI 89%: [ 0.03 100.25]
+#> Conditioned on rand. effects: 39.60 HDI 89%: [31.46 48.18]
#>
#> ## Difference in Variances
-#> Difference: -201.34 HDI 89%: [-26.75 49.48]
+#> Difference: -381.45 HDI 89%: [-62.89 50.83]
# use median instead of mean
icc(m3, ppd = TRUE, typical = "median")
@@ -523,25 +523,25 @@ ICC for multilevel models
#> Conditioned on: all random effects
#>
#> ## Variance Ratio (comparable to ICC)
-#> Ratio: 0.73 HDI 89%: [-0.63 1.00]
+#> Ratio: 0.71 HDI 89%: [-1.53 1.00]
#>
#> ## Variances of Posterior Predicted Distribution
-#> Conditioned on fixed effects: 10.60 HDI 89%: [ 0.00 64.63]
-#> Conditioned on rand. effects: 39.38 HDI 89%: [31.52 48.39]
+#> Conditioned on fixed effects: 11.08 HDI 89%: [ 0.02 100.88]
+#> Conditioned on rand. effects: 39.39 HDI 89%: [31.53 47.90]
#>
#> ## Difference in Variances
-#> Difference: 27.83 HDI 89%: [-28.83 49.15]
r2()
computes either the Bayes r-squared value, or - if loo = TRUE
- a LOO-adjusted r-squared value (which comes conceptionally closer to an adjusted r-squared measure).
For the Bayes r-squared, the standard error is also reported. Note that r2()
uses the median as measure of central tendency and the median absolute deviation as measure for variability.
r2(m5)
-#> Bayes R2: 0.169
-#> Standard Error: 0.022
+#> Bayes R2: 0.168
+#> Standard Error: 0.021
r2(m5, loo = TRUE)
-#> LOO-adjusted R2: 0.146
# Using the t-statistics for calculating the p-value
p_value(m2)
#> term p.value std.error
-#> 1 (Intercept) 0 9.747
-#> 2 Days 0 0.804
+#> 1 (Intercept) 0 9.760
+#> 2 Days 0 0.805
# p-values based on conditional F-tests with
# Kenward-Roger approximation for the degrees of freedom
p_value(m2, p.kr = TRUE)
#> term p.value std.error
-#> 1 (Intercept) 0 9.769
+#> 1 (Intercept) 0 9.782
#> 2 Days 0 0.814
To see more details on the degrees of freedom when using Kenward-Roger approximation, use the summary()
-method:
pv <- p_value(m2, p.kr = TRUE)
summary(pv)
#> term p.value std.error df statistic
-#> 1 (Intercept) 0 9.769 22.754 25.734
-#> 2 Days 0 0.814 160.934 12.865