diff --git a/DESCRIPTION b/DESCRIPTION index 06a57648..94a5f611 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -66,6 +66,7 @@ Suggests: pROC, psych, scales, + splines, sjPlot, survey, rstan, diff --git a/R/mcse.R b/R/mcse.R index cb191547..b8d7d76b 100644 --- a/R/mcse.R +++ b/R/mcse.R @@ -16,6 +16,9 @@ mcse.brmsfit <- function(x, type = c("fixed", "random", "all"), ...) { #' @export mcse.stanmvreg <- function(x, type = c("fixed", "random", "all"), ...) { + # check arguments + type <- match.arg(type) + s <- summary(x) dat <- tibble::tibble( term = rownames(s), diff --git a/R/pred_vars.R b/R/pred_vars.R index 86ab28c7..a1abfc57 100644 --- a/R/pred_vars.R +++ b/R/pred_vars.R @@ -11,7 +11,7 @@ #' model, returns the model frame for fixed effects only. #' @param multi.resp Logical, if \code{TRUE} and model is a multivariate response #' model from a \code{brmsfit} object or of class \code{stanmvreg}, then a -#' list of values for each regression is returned. +#' list of values (one for each regression) is returned. #' #' @return For \code{pred_vars()} and \code{resp_var()}, the name(s) of the #' response or predictor variables from \code{x} as character vector. diff --git a/R/tidy_stan.R b/R/tidy_stan.R index c734c404..151f1810 100644 --- a/R/tidy_stan.R +++ b/R/tidy_stan.R @@ -97,19 +97,18 @@ tidy_stan <- function(x, prob = .89, typical = "median", trans = NULL, type = c( # compute HDI out.hdi <- hdi(x, prob = prob, trans = trans, type = type) - # we need names of elements, for correct removal + # get statistics nr <- bayesplot::neff_ratio(x) + # we need names of elements, for correct removal + if (inherits(x, "brmsfit")) { cnames <- make.names(names(nr)) keep <- cnames %in% out.hdi$term } else { - keep <- 1:nrow(out.hdi) + keep <- names(nr) %in% out.hdi$term } - - # compute additional statistics, like point estimate, standard errors etc. - nr <- nr[keep] ratio <- data.frame( term = names(nr), @@ -117,7 +116,17 @@ tidy_stan <- function(x, prob = .89, typical = "median", trans = NULL, type = c( stringsAsFactors = FALSE ) - rh <- bayesplot::rhat(x)[keep] + + rh <- bayesplot::rhat(x) + + if (inherits(x, "brmsfit")) { + cnames <- make.names(names(rh)) + keep <- cnames %in% out.hdi$term + } else { + keep <- names(rh) %in% out.hdi$term + } + + rh <- rh[keep] rhat <- data.frame( term = names(rh), rhat = rh, @@ -243,6 +252,9 @@ tidy_stan <- function(x, prob = .89, typical = "median", trans = NULL, type = c( } + ## TODO extract Sigma for stanmvreg random effects + + # find random slopes rs1 <- grep("b\\[(.*) (.*)\\]", out$term) @@ -324,7 +336,32 @@ tidy_stan <- function(x, prob = .89, typical = "median", trans = NULL, type = c( } - ## TODO add support for multivariate response model for rstanarm + + if (inherits(x, "stanmvreg")) { + + # get response variables + + responses <- resp_var(x) + resp.names <- names(responses) + + + # create "response-level" variable + + out <- tibble::add_column(out, response = "", .before = 1) + + + # copy name of response into new character variable + # and remove response name from term name + + for (i in 1:length(responses)) { + pattern <- paste0(resp.names[i], "|") + m <- tidyselect::starts_with(pattern, vars = out$term) + out$response[intersect(which(out$response == ""), m)] <- responses[i] + out$term <- gsub(pattern, "", out$term, fixed = TRUE) + } + + } + class(out) <- c("tidy_stan", class(out)) diff --git a/inst/doc/anova-statistics.html b/inst/doc/anova-statistics.html index 666c4782..3a4dbb6d 100644 --- a/inst/doc/anova-statistics.html +++ b/inst/doc/anova-statistics.html @@ -12,7 +12,7 @@ - + Statistics for Anova Tables @@ -70,7 +70,7 @@

Statistics for Anova Tables

Daniel Lüdecke

-

2018-07-08

+

2018-07-09

@@ -194,9 +194,9 @@

Confidence Intervals

#> # A tibble: 3 x 4 #> term partial.omegasq conf.low conf.high #> <chr> <dbl> <dbl> <dbl> -#> 1 e42dep 0.278 0.228 0.338 -#> 2 c172code 0.00547 -0.00610 0.0224 -#> 3 c160age 0.0649 0.0315 0.104 +#> 1 e42dep 0.278 0.223 0.332 +#> 2 c172code 0.00547 -0.00671 0.0221 +#> 3 c160age 0.0649 0.0348 0.0997

References

diff --git a/inst/doc/bayesian-statistics.html b/inst/doc/bayesian-statistics.html index 0026b308..f202276c 100644 --- a/inst/doc/bayesian-statistics.html +++ b/inst/doc/bayesian-statistics.html @@ -12,7 +12,7 @@ - + Statistics for Bayesian Models @@ -70,7 +70,7 @@

Statistics for Bayesian Models

Daniel Lüdecke

-

2018-07-08

+

2018-07-09

@@ -145,18 +145,18 @@

Highest Density Interval

#> # Highest Density Interval #> #> HDI(90%) -#> b_jobseek_Intercept [ 3.47 3.88] -#> b_depress2_Intercept [ 1.95 2.45] -#> b_jobseek_treat [-0.02 0.15] +#> b_jobseek_Intercept [ 3.46 3.87] +#> b_depress2_Intercept [ 1.97 2.46] +#> b_jobseek_treat [-0.02 0.16] #> b_jobseek_econ_hard [ 0.01 0.09] -#> b_jobseek_sex [-0.10 0.07] +#> b_jobseek_sex [-0.08 0.08] #> b_jobseek_age [ 0.00 0.01] #> b_depress2_treat [-0.11 0.03] #> b_depress2_job_seek [-0.28 -0.19] #> b_depress2_econ_hard [ 0.11 0.18] -#> b_depress2_sex [ 0.04 0.18] +#> b_depress2_sex [ 0.04 0.17] #> b_depress2_age [-0.00 0.00] -#> sigma_jobseek [ 0.70 0.76] +#> sigma_jobseek [ 0.70 0.75] #> sigma_depress2 [ 0.59 0.64] hdi(m2, prob = c(.5, .89)) @@ -164,18 +164,18 @@

Highest Density Interval

#> # Highest Density Interval #> #> HDI(50%) HDI(89%) -#> b_jobseek_Intercept [ 3.60 3.77] [ 3.48 3.88] -#> b_depress2_Intercept [ 2.13 2.34] [ 1.96 2.45] -#> b_jobseek_treat [ 0.03 0.10] [-0.01 0.15] -#> b_jobseek_econ_hard [ 0.03 0.07] [ 0.01 0.09] -#> b_jobseek_sex [-0.04 0.02] [-0.09 0.07] +#> b_jobseek_Intercept [ 3.59 3.76] [ 3.47 3.87] +#> b_depress2_Intercept [ 2.10 2.30] [ 1.97 2.45] +#> b_jobseek_treat [ 0.03 0.10] [-0.02 0.15] +#> b_jobseek_econ_hard [ 0.04 0.07] [ 0.02 0.09] +#> b_jobseek_sex [-0.03 0.03] [-0.08 0.07] #> b_jobseek_age [ 0.00 0.01] [ 0.00 0.01] #> b_depress2_treat [-0.07 -0.01] [-0.11 0.03] #> b_depress2_job_seek [-0.26 -0.22] [-0.28 -0.19] -#> b_depress2_econ_hard [ 0.13 0.16] [ 0.11 0.18] -#> b_depress2_sex [ 0.08 0.13] [ 0.04 0.17] +#> b_depress2_econ_hard [ 0.13 0.16] [ 0.12 0.18] +#> b_depress2_sex [ 0.07 0.13] [ 0.04 0.17] #> b_depress2_age [-0.00 0.00] [-0.00 0.00] -#> sigma_jobseek [ 0.71 0.74] [ 0.70 0.76] +#> sigma_jobseek [ 0.72 0.74] [ 0.70 0.75] #> sigma_depress2 [ 0.60 0.62] [ 0.59 0.64]

For multilevel models, the type-argument defines whether the HDI of fixed, random or all effects are shown.

hdi(m5, type = "random")
@@ -183,14 +183,14 @@ 

Highest Density Interval

#> # Highest Density Interval #> #> HDI(90%) -#> r_e15relat.1.Intercept. [-0.16 1.30] -#> r_e15relat.2.Intercept. [-0.14 1.09] -#> r_e15relat.3.Intercept. [-0.85 0.77] -#> r_e15relat.4.Intercept. [-0.58 0.79] -#> r_e15relat.5.Intercept. [-0.90 0.85] -#> r_e15relat.6.Intercept. [-1.58 0.28] -#> r_e15relat.7.Intercept. [-1.10 0.79] -#> r_e15relat.8.Intercept. [-0.90 0.47]
+#> r_e15relat.1.Intercept. [-0.11 1.41] +#> r_e15relat.2.Intercept. [-0.18 1.07] +#> r_e15relat.3.Intercept. [-0.91 0.86] +#> r_e15relat.4.Intercept. [-0.59 0.84] +#> r_e15relat.5.Intercept. [-0.91 0.85] +#> r_e15relat.6.Intercept. [-1.59 0.29] +#> r_e15relat.7.Intercept. [-1.06 0.97] +#> r_e15relat.8.Intercept. [-0.98 0.49]

The computation for the HDI is based on the code from Kruschke 2015, pp. 727f. For default sampling in Stan (4000 samples), the 90% intervals for HDI are more stable than, for instance, 95% intervals. An effective sample size of at least 10.000 is recommended if 95% intervals should be computed (see Kruschke 2015, p. 183ff).

@@ -203,12 +203,12 @@

Region of Practical Equivalence (ROPE)

#> #> inside outside #> b_Intercept 0.0% 100.0% -#> b_e42dep2 40.8% 59.2% -#> b_e42dep3 0.4% 99.6% +#> b_e42dep2 43.8% 56.2% +#> b_e42dep3 0.5% 99.5% #> b_e42dep4 0.0% 100.0% #> b_c12hour 100.0% 0.0% -#> b_c172code2 99.4% 0.6% -#> b_c172code3 77.2% 22.8% +#> b_c172code2 99.5% 0.5% +#> b_c172code3 78.8% 21.2% #> sigma 0.0% 100.0%

rope() does not suggest limits for the region of practical equivalence and does not tell you how big is practically equivalent to the null value. However, there are suggestions how to choose reasonable limits (see Kruschke 2018), which are implemented in the equi_test() functions.

@@ -225,21 +225,21 @@

Test for Practical Equivalence

#> ROPE: [-0.39 0.39] #> Samples: 4000 #> -#> H0 %inROPE HDI(95%) -#> b_Intercept (*) reject 0.00 [ 7.55 9.80] -#> b_e42dep2 (*) undecided 7.78 [ 0.10 2.04] -#> b_e42dep3 (*) reject 0.00 [ 1.35 3.28] -#> b_e42dep4 (*) reject 0.00 [ 2.81 4.91] -#> b_c12hour accept 100.00 [ 0.00 0.01] -#> b_c172code2 (*) undecided 72.35 [-0.46 0.78] -#> b_c172code3 (*) undecided 21.73 [-0.04 1.49] -#> sigma (*) reject 0.00 [ 3.42 3.76] +#> H0 %inROPE HDI(95%) +#> b_Intercept (*) reject 0.00 [ 7.43 10.09] +#> b_e42dep2 (*) undecided 8.53 [ 0.09 2.07] +#> b_e42dep3 (*) reject 0.00 [ 1.32 3.28] +#> b_e42dep4 (*) reject 0.00 [ 2.78 4.89] +#> b_c12hour accept 100.00 [ 0.00 0.01] +#> b_c172code2 (*) undecided 72.80 [-0.46 0.78] +#> b_c172code3 (*) undecided 21.77 [-0.08 1.45] +#> sigma reject 0.00 [ 3.41 3.75] #> #> (*) the number of effective samples may be insufficient for some parameters

For models with binary outcome, there is no concrete way to derive the effect size that defines the ROPE limits. Two examples from Kruschke suggest that a negligible change is about .05 on the logit-scale. In these cases, it is recommended to specify the rope argument, however, if not specified, the ROPE limits are calculated in this way: 0 +/- .1 * sd(intercept) / 4. For all other models, 0 +/- .1 * sd(intercept) is used to determine the ROPE limits. These formulas are based on experience that worked well in real-life situations, but are most likely not generally the best approach.

Beside a numerical output, the results can also be printed as HTML-table or plotted, using the out-argument. For plots, the 95% distributions of the posterior samles are shown, the ROPE is a light-blue shaded region in the plot, and the distributions are colored depending on whether the parameter values are accepted, rejected or undecided.

equi_test(m5, out = "plot")
-

+

Tidy Summary of Bayesian Models

@@ -256,17 +256,17 @@

Tidy Summary of Bayesian Models

#> #> ## Conditional Model: #> -#> estimate std.error HDI(89%) neff_ratio Rhat mcse -#> Intercept 1.23 0.74 [-0.27 2.76] 0.21 1 0.03 -#> child -1.15 0.10 [-1.30 -0.99] 0.91 1 0.00 -#> camper 0.73 0.10 [ 0.57 0.87] 0.79 1 0.00 +#> estimate std.error HDI(89%) ratio rhat mcse +#> Intercept 1.24 0.79 [-0.44 2.89] 0.04 1.03 0.08 +#> child -1.15 0.09 [-1.29 -0.99] 1.00 1.00 0.00 +#> camper 0.73 0.09 [ 0.58 0.89] 0.74 1.00 0.00 #> #> ## Zero-Inflated Model: #> -#> estimate std.error HDI(89%) neff_ratio Rhat mcse -#> Intercept -0.71 0.74 [-2.09 0.50] 0.33 1 0.02 -#> child 1.88 0.32 [ 1.36 2.41] 0.87 1 0.01 -#> camper -0.84 0.35 [-1.40 -0.26] 0.73 1 0.01
+#> estimate std.error HDI(89%) ratio rhat mcse +#> Intercept -0.69 0.74 [-1.99 0.51] 0.43 1 0.02 +#> child 1.90 0.32 [ 1.34 2.39] 0.73 1 0.01 +#> camper -0.83 0.35 [-1.40 -0.28] 0.83 1 0.01

Additional statistics in the output are: