style files

gavinsimpson · Mar 6, 2024 · cde53ab · cde53ab
1 parent 217abc2
commit cde53ab
Show file tree

Hide file tree

Showing 4 changed files with 209 additions and 126 deletions.
diff --git a/vignettes/custom-plotting.Rmd b/vignettes/custom-plotting.Rmd
@@ -42,7 +42,8 @@ eg1 <- data_sim("eg1", n = n, seed = 1)
 
 # fit model
 m <- gam(y ~ s(x0) + s(x1) + s(x2) + s(x3),
-         data = eg1, method = "REML")
+  data = eg1, method = "REML"
+)
 ```
 
 The default plot produced by `draw()` is
@@ -131,12 +132,15 @@ p_sx2 <- sm %>%
   filter(.smooth == "s(x2)") %>%
   ggplot() +
   geom_rug(aes(x = x2),
-           data = eg1,
-           sides = "b", length = grid::unit(0.02, "npc")) +
+    data = eg1,
+    sides = "b", length = grid::unit(0.02, "npc")
+  ) +
   geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, x = x2),
-              alpha = 0.2) +
+    alpha = 0.2
+  ) +
   geom_point(aes(x = x2, y = `s(x2)`),
-             data = eg1, cex = 1.5, colour = "steelblue3") +
+    data = eg1, cex = 1.5, colour = "steelblue3"
+  ) +
   geom_line(aes(x = x2, y = .estimate), lwd = 1.2) +
   labs(y = "Partial effect", title = "s(x2)")
 p_sx2
@@ -148,38 +152,47 @@ p_sx0 <- sm %>%
   filter(.smooth == "s(x0)") %>%
   ggplot() +
   geom_rug(aes(x = x0),
-           data = eg1,
-           sides = "b", length = grid::unit(0.02, "npc")) +
+    data = eg1,
+    sides = "b", length = grid::unit(0.02, "npc")
+  ) +
   geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, x = x0),
-              alpha = 0.2) +
+    alpha = 0.2
+  ) +
   geom_point(aes(x = x0, y = `s(x0)`),
-             data = eg1, cex = 1.5, colour = "steelblue3") +
+    data = eg1, cex = 1.5, colour = "steelblue3"
+  ) +
   geom_line(aes(x = x0, y = .estimate), lwd = 1.2) +
   labs(y = "Partial effect", title = "s(x0)")
 
 p_sx1 <- sm %>%
   filter(.smooth == "s(x1)") %>%
   ggplot() +
   geom_rug(aes(x = x1),
-           data = eg1,
-           sides = "b", length = grid::unit(0.02, "npc")) +
+    data = eg1,
+    sides = "b", length = grid::unit(0.02, "npc")
+  ) +
   geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, x = x1),
-              alpha = 0.2) +
+    alpha = 0.2
+  ) +
   geom_point(aes(x = x1, y = `s(x1)`),
-             data = eg1, cex = 1.5, colour = "steelblue3") +
+    data = eg1, cex = 1.5, colour = "steelblue3"
+  ) +
   geom_line(aes(x = x1, y = .estimate), lwd = 1.2) +
   labs(y = "Partial effect", title = "s(x1)")
 
 p_sx3 <- sm %>%
   filter(.smooth == "s(x3)") %>%
   ggplot() +
   geom_rug(aes(x = x3),
-           data = eg1,
-           sides = "b", length = grid::unit(0.02, "npc")) +
+    data = eg1,
+    sides = "b", length = grid::unit(0.02, "npc")
+  ) +
   geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, x = x3),
-              alpha = 0.2) +
+    alpha = 0.2
+  ) +
   geom_point(aes(x = x3, y = `s(x3)`),
-             data = eg1, cex = 1.5, colour = "steelblue3") +
+    data = eg1, cex = 1.5, colour = "steelblue3"
+  ) +
   geom_line(aes(x = x3, y = .estimate), lwd = 1.2) +
   labs(y = "Partial effect", title = "s(x3)")
 ```
@@ -205,27 +218,37 @@ plt <- sm %>%
   filter(.smooth == "s(x2)") %>%
   ggplot() +
   geom_rug(aes(x = x2),
-           data = eg1,
-           sides = "b", length = grid::unit(0.02, "npc")) +
+    data = eg1,
+    sides = "b", length = grid::unit(0.02, "npc")
+  ) +
   geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, x = x2),
-              alpha = 0.2) +
+    alpha = 0.2
+  ) +
   geom_line(aes(x = x2, y = .estimate), lwd = 1.2) +
   labs(y = "Partial effect", title = "s(x2)")
 
 plt +
-  geom_point(aes(x = x2, y = `s(x2)`,
-                 colour = fac), # <-- map fac to colour aesthetic
-             data = eg1, cex = 1.5)
+  geom_point(
+    aes(
+      x = x2, y = `s(x2)`,
+      colour = fac
+    ), # <-- map fac to colour aesthetic
+    data = eg1, cex = 1.5
+  )
 ```
 
 We can also do some simple model checking by plotting the smooth with partial residuals coloured according to one of the other covariates (we could also do this by plotting the actual residuals against covariates). In the code chunk below, we map the covariate `x1` to both the *colour* and *size* aesthetics (note we deleted `cex = 1.5` to allow the mapping to *size*)
 
 ```{r plot-partial-resids-with-colours-by-x1}
 plt +
-  geom_point(aes(x = x2, y = `s(x2)`,
-                 colour = x1, size = x1), # <-- map fac to colour aesthetic
-             data = eg1, alpha = 0.3) +   # <-- deleted cex!!
+  geom_point(
+    aes(
+      x = x2, y = `s(x2)`,
+      colour = x1, size = x1
+    ), # <-- map fac to colour aesthetic
+    data = eg1, alpha = 0.3
+  ) + # <-- deleted cex!!
   scale_colour_viridis_c(option = "plasma")
 ```
 
-The resulting plot doesn't show any particular problems with the model because of the way the data were simulated, but hopefully this illustrates what can be possible once you use the low-level functions provided by {gratia}.
+The resulting plot doesn't show any particular problems with the model because of the way the data were simulated, but hopefully this illustrates what can be possible once you use the low-level functions provided by {gratia}.
diff --git a/vignettes/data-slices.Rmd b/vignettes/data-slices.Rmd
@@ -39,30 +39,31 @@ The first example uses a small data set from an experimental study of the cold t
 ## data load and prep
 data(CO2, package = "datasets")
 plant <- CO2 |>
-    as_tibble() |>
-    rename(plant = Plant, type = Type, treatment = Treatment) |>
-    mutate(plant = factor(plant, ordered = FALSE))
+  as_tibble() |>
+  rename(plant = Plant, type = Type, treatment = Treatment) |>
+  mutate(plant = factor(plant, ordered = FALSE))
 ```
 
 ```{r plot-plant-data}
 plant_ylab <- expression(CO[2] ~ uptake ~ (mu * mol ~ m^{-3}))
 plant_xlab <- expression(CO[2] ~ concentration ~ (mL ~ L^{-1}))
 
 plant |>
-    ggplot(aes(x = conc, y = uptake, group = plant, colour = treatment)) +
-    geom_point() +
-    geom_line() +
-    facet_wrap(~ type) +
-    labs(y = plant_ylab, x = plant_xlab, colour = "Treatment")
+  ggplot(aes(x = conc, y = uptake, group = plant, colour = treatment)) +
+  geom_point() +
+  geom_line() +
+  facet_wrap(~type) +
+  labs(y = plant_ylab, x = plant_xlab, colour = "Treatment")
 ```
 
 One way to model these data is to allow for different smooths for all combinations of the `treatment` and `type` covariates
 ```{r fit-plant-gam}
 plant <- plant |>
-    mutate(tt = fct_cross(treatment, type))
-m_plant <- gam(uptake ~ treatment * type +
-                 s(conc, by = tt, k = 6) + s(plant, bs = "re"),
-    data = plant, method = "REML", familly = Gamma(link = "log"))
+  mutate(tt = fct_cross(treatment, type))
+m_plant <- gam(uptake ~ treatment * type + s(conc, by = tt, k = 6) +
+    s(plant, bs = "re"),
+  data = plant, method = "REML", familly = Gamma(link = "log")
+)
 overview(m_plant)
 ```
 
@@ -73,16 +74,20 @@ draw(m_plant, residuals = TRUE, scales = "fixed")
 
 We might want to compare model fitted values for the treatment for each of the types (origins), ignoring the random effect component. For this we want to evaluate the model at a range of values of covariate `conc` for some combinations of the other factors. This is a data slice through the covariate space, which we can create using `data_slice()`. To create a data slice for `conc` for the `Quebec` `type` in the `chilled` `treatment` we would use
 ```{r plant-ds-1}
-ds1 <- data_slice(m_plant, conc = evenly(conc, n = 100),
-    type = level(type, "Quebec"), treatment = level(treatment, "chilled"))
+ds1 <- data_slice(m_plant,
+  conc = evenly(conc, n = 100),
+  type = level(type, "Quebec"), treatment = level(treatment, "chilled")
+)
 ds1
 ```
 
 Notice how `data_slice()` has filled in something for the remaining covariates that we didn't mention? In this case, `data_slice()` doesn't know how `tt` was created, so it has chosen the modal level for the `tt` factor, which is not the correct choice in this case. Instead, we need to specify the correct level explicitly for `tt`
 ```{r plant-ds-1a}
-ds1 <- data_slice(m_plant, conc = evenly(conc, n = 100),
-    treatment = level(treatment, "chilled"), type = level(type, "Quebec"),
-    tt = level(tt, "chilled:Quebec"))
+ds1 <- data_slice(m_plant,
+  conc = evenly(conc, n = 100),
+  treatment = level(treatment, "chilled"), type = level(type, "Quebec"),
+  tt = level(tt, "chilled:Quebec")
+)
 ds1
 ```
 
@@ -97,43 +102,56 @@ Notice how we excluded the random effect term; even though we had to specify som
 Plotting the fitted values for the data slice now only requires some simple {ggplot2} knowledge
 ```{r plot-plant-fitted-1}
 fv1 |>
-    ggplot(aes(x = conc, y = .fitted)) +
-    geom_point(data = plant |>
-                 filter(type == "Quebec", treatment == "chilled"),
-        mapping = aes(y = uptake),
-        alpha = 0.8, colour = "steelblue") +
-    geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci), alpha = 0.2) +
-    geom_line() +
-    labs(x = plant_xlab, y = plant_ylab,
-        title = expression(Estimated ~ CO[2] ~ uptake),
-        subtitle = "Chilled plants of the Quebec type")
+  ggplot(aes(x = conc, y = .fitted)) +
+  geom_point(
+    data = plant |>
+      filter(type == "Quebec", treatment == "chilled"),
+    mapping = aes(y = uptake),
+    alpha = 0.8, colour = "steelblue"
+  ) +
+  geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci), alpha = 0.2) +
+  geom_line() +
+  labs(
+    x = plant_xlab, y = plant_ylab,
+    title = expression(Estimated ~ CO[2] ~ uptake),
+    subtitle = "Chilled plants of the Quebec type"
+  )
 ```
 
 Next, let's compare the fitted effects of the treatment in the Mississippi origin plants
 ```{r plant-data-slice-2}
-ds2 <- data_slice(m_plant, conc = evenly(conc, n = 100),
-    treatment = evenly(treatment), type = level(type, "Mississippi")) |>
-    mutate(tt = fct_cross(treatment, type, keep_empty = TRUE))
+ds2 <- data_slice(m_plant,
+  conc = evenly(conc, n = 100),
+  treatment = evenly(treatment), type = level(type, "Mississippi")
+) |>
+  mutate(tt = fct_cross(treatment, type, keep_empty = TRUE))
 ds2
 ```
 
 Here, we replaced the automatically-generated `tt` variable with the correctly specified call to `fct_cross()`, retaining the levels of the `type` and `treatment` factors. This insures that we have the correct combinations corresponding to the `treatment` and `type` factors but also that we preserve the original levels of the `tt` covariate we created.
 
 We can again visualise the fitted values for this data slice
 ```{r draw-plant-fitted-values-2}
-fitted_values(m_plant, data = ds2, scale = "response",
-    exclude = "s(plant)") |>
-    ggplot(aes(x = conc, y = .fitted, group = treatment)) +
-    geom_point(data = plant |> filter(type == "Mississippi"),
-        mapping = aes(y = uptake, colour = treatment),
-        alpha = 0.8) +
-    geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, fill = treatment),
-        alpha = 0.2) +
-    geom_line(aes(colour = treatment)) +
-    labs(x = plant_xlab, y = plant_ylab,
-        title = expression(Estimated ~ CO[2] ~ uptake),
-        subtitle = "Comparison of treatment in plants of the Mississippi type",
-        colour = "Treatment", fill = "Treatment")
+fitted_values(m_plant,
+  data = ds2, scale = "response",
+  exclude = "s(plant)"
+) |>
+  ggplot(aes(x = conc, y = .fitted, group = treatment)) +
+  geom_point(
+    data = plant |> filter(type == "Mississippi"),
+    mapping = aes(y = uptake, colour = treatment),
+    alpha = 0.8
+  ) +
+  geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, fill = treatment),
+    alpha = 0.2
+  ) +
+  geom_line(aes(colour = treatment)) +
+  labs(
+    x = plant_xlab, y = plant_ylab,
+    title = expression(Estimated ~ CO[2] ~ uptake),
+    subtitle = "Comparison of treatment in plants of the Mississippi type",
+    colour = "Treatment", fill = "Treatment"
+  )
 ```
 
 When we were creating our data slices, we used some helper functions to specify covariate values for the slice. {gratia} provides several such helper functions:
@@ -161,7 +179,7 @@ In the second example, I'll use the bivariate example data set from {mgcv} but f
 df <- data_sim("eg2", n = 1000, scale = 0.25, seed = 2)
 
 # fit the GAM
-m_biv <- gam(y ~ te(x,z), data = df, method = "REML")
+m_biv <- gam(y ~ te(x, z), data = df, method = "REML")
 ```
 
 The aim of the example will be to create a univariate data slice through the 2D smooth at user-specified values of `x` while holding `z` at one or more fixed values. We could visualise the effect at the smooth level, using `smooth_estimates()`, or at the response level, as we did above using `fitted_values()`.
@@ -171,10 +189,11 @@ The aim of the example will be to create a univariate data slice through the 2D
 We begin by creating a slice through the data space. We also create a label at this point for a nice axis label.
 ```{r data-slice-biv-1}
 ds3 <- data_slice(m_biv,
-                  x = evenly(x, n = 100),
-                  z = quantile(z, probs = 0.25))
+  x = evenly(x, n = 100),
+  z = quantile(z, probs = 0.25)
+)
 
-z_val <- with(ds3, round(quantile(z, probs = 0.25),2))
+z_val <- with(ds3, round(quantile(z, probs = 0.25), 2))
 ylab <- bquote(hat(f)(x, .(z_val)))
 ```
 
@@ -191,16 +210,20 @@ sm |>
   ggplot(aes(x = x, y = .estimate)) +
   geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci), alpha = 0.2) +
   geom_line() +
-  labs(title = "Evaluation of smooth te(x,z) at fixed z",
-       y = ylab)
+  labs(
+    title = "Evaluation of smooth te(x,z) at fixed z",
+    y = ylab
+  )
 ```
 
 Note that the above interval is not the Marra and Wood (2012) interval. It doesn't include the uncertainty from the model constant term at the moment, but unless the smooth is very close to linear that shouldn't make too much difference.
 
 This extends to multiple slices by asking for several discrete `z`
 ```{r data-slice-biv-2}
-ds4 <- data_slice(m_biv, x = evenly(x, n = 100),
-                  z = round(quantile(z, probs = c(0.25, 0.5, 0.75)), 2))
+ds4 <- data_slice(m_biv,
+  x = evenly(x, n = 100),
+  z = round(quantile(z, probs = c(0.25, 0.5, 0.75)), 2)
+)
 
 sm <- smooth_estimates(m_biv, smooth = "te(x,z)", data = ds4) |>
   add_confint() |>
@@ -209,10 +232,13 @@ sm <- smooth_estimates(m_biv, smooth = "te(x,z)", data = ds4) |>
 sm |>
   ggplot(aes(x = x, y = .estimate, colour = fz, group = fz)) +
   geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, fill = fz, colour = NULL),
-              alpha = 0.2) +
+    alpha = 0.2
+  ) +
   geom_line() +
-  labs(title = "Evaluation of smooth te(x,z) at fixed z",
-       y = expression(hat(f)(x,z)), colour = "z", fill = "z")
+  labs(
+    title = "Evaluation of smooth te(x,z) at fixed z",
+    y = expression(hat(f)(x, z)), colour = "z", fill = "z"
+  )
 ```
 
 ### Using `fitted_samples()`
@@ -225,8 +251,10 @@ fitted_values(m_biv, data = ds3) |> # default is response scale, not link
   ggplot(aes(x = x, y = .fitted)) +
   geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci), alpha = 0.2) +
   geom_line() +
-  labs(title = "Fitted values from model",
-       y = expression(hat(y)))
+  labs(
+    title = "Fitted values from model",
+    y = expression(hat(y))
+  )
 ```
 
 And for the multiple `z` we have
@@ -235,9 +263,12 @@ fitted_values(m_biv, data = ds4) |>
   mutate(fz = factor(z)) |>
   ggplot(aes(x = x, y = .fitted, colour = fz, group = fz)) +
   geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, fill = fz, colour = NULL),
-              alpha = 0.2) +
+    alpha = 0.2
+  ) +
   geom_line() +
-  labs(title = "Fitted values from model",
-       y = expression(hat(y)), colour = "z", fill = "z")
+  labs(
+    title = "Fitted values from model",
+    y = expression(hat(y)), colour = "z", fill = "z"
+  )
 ```
 where the only difference here is that now the model constant is included as well as its uncertainty.