Skip to content

Commit

Permalink
style files
Browse files Browse the repository at this point in the history
  • Loading branch information
gavinsimpson committed Mar 6, 2024
1 parent 217abc2 commit cde53ab
Show file tree
Hide file tree
Showing 4 changed files with 209 additions and 126 deletions.
77 changes: 50 additions & 27 deletions vignettes/custom-plotting.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ eg1 <- data_sim("eg1", n = n, seed = 1)
# fit model
m <- gam(y ~ s(x0) + s(x1) + s(x2) + s(x3),
data = eg1, method = "REML")
data = eg1, method = "REML"
)
```

The default plot produced by `draw()` is
Expand Down Expand Up @@ -131,12 +132,15 @@ p_sx2 <- sm %>%
filter(.smooth == "s(x2)") %>%
ggplot() +
geom_rug(aes(x = x2),
data = eg1,
sides = "b", length = grid::unit(0.02, "npc")) +
data = eg1,
sides = "b", length = grid::unit(0.02, "npc")
) +
geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, x = x2),
alpha = 0.2) +
alpha = 0.2
) +
geom_point(aes(x = x2, y = `s(x2)`),
data = eg1, cex = 1.5, colour = "steelblue3") +
data = eg1, cex = 1.5, colour = "steelblue3"
) +
geom_line(aes(x = x2, y = .estimate), lwd = 1.2) +
labs(y = "Partial effect", title = "s(x2)")
p_sx2
Expand All @@ -148,38 +152,47 @@ p_sx0 <- sm %>%
filter(.smooth == "s(x0)") %>%
ggplot() +
geom_rug(aes(x = x0),
data = eg1,
sides = "b", length = grid::unit(0.02, "npc")) +
data = eg1,
sides = "b", length = grid::unit(0.02, "npc")
) +
geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, x = x0),
alpha = 0.2) +
alpha = 0.2
) +
geom_point(aes(x = x0, y = `s(x0)`),
data = eg1, cex = 1.5, colour = "steelblue3") +
data = eg1, cex = 1.5, colour = "steelblue3"
) +
geom_line(aes(x = x0, y = .estimate), lwd = 1.2) +
labs(y = "Partial effect", title = "s(x0)")
p_sx1 <- sm %>%
filter(.smooth == "s(x1)") %>%
ggplot() +
geom_rug(aes(x = x1),
data = eg1,
sides = "b", length = grid::unit(0.02, "npc")) +
data = eg1,
sides = "b", length = grid::unit(0.02, "npc")
) +
geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, x = x1),
alpha = 0.2) +
alpha = 0.2
) +
geom_point(aes(x = x1, y = `s(x1)`),
data = eg1, cex = 1.5, colour = "steelblue3") +
data = eg1, cex = 1.5, colour = "steelblue3"
) +
geom_line(aes(x = x1, y = .estimate), lwd = 1.2) +
labs(y = "Partial effect", title = "s(x1)")
p_sx3 <- sm %>%
filter(.smooth == "s(x3)") %>%
ggplot() +
geom_rug(aes(x = x3),
data = eg1,
sides = "b", length = grid::unit(0.02, "npc")) +
data = eg1,
sides = "b", length = grid::unit(0.02, "npc")
) +
geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, x = x3),
alpha = 0.2) +
alpha = 0.2
) +
geom_point(aes(x = x3, y = `s(x3)`),
data = eg1, cex = 1.5, colour = "steelblue3") +
data = eg1, cex = 1.5, colour = "steelblue3"
) +
geom_line(aes(x = x3, y = .estimate), lwd = 1.2) +
labs(y = "Partial effect", title = "s(x3)")
```
Expand All @@ -205,27 +218,37 @@ plt <- sm %>%
filter(.smooth == "s(x2)") %>%
ggplot() +
geom_rug(aes(x = x2),
data = eg1,
sides = "b", length = grid::unit(0.02, "npc")) +
data = eg1,
sides = "b", length = grid::unit(0.02, "npc")
) +
geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, x = x2),
alpha = 0.2) +
alpha = 0.2
) +
geom_line(aes(x = x2, y = .estimate), lwd = 1.2) +
labs(y = "Partial effect", title = "s(x2)")
plt +
geom_point(aes(x = x2, y = `s(x2)`,
colour = fac), # <-- map fac to colour aesthetic
data = eg1, cex = 1.5)
geom_point(
aes(
x = x2, y = `s(x2)`,
colour = fac
), # <-- map fac to colour aesthetic
data = eg1, cex = 1.5
)
```

We can also do some simple model checking by plotting the smooth with partial residuals coloured according to one of the other covariates (we could also do this by plotting the actual residuals against covariates). In the code chunk below, we map the covariate `x1` to both the *colour* and *size* aesthetics (note we deleted `cex = 1.5` to allow the mapping to *size*)

```{r plot-partial-resids-with-colours-by-x1}
plt +
geom_point(aes(x = x2, y = `s(x2)`,
colour = x1, size = x1), # <-- map fac to colour aesthetic
data = eg1, alpha = 0.3) + # <-- deleted cex!!
geom_point(
aes(
x = x2, y = `s(x2)`,
colour = x1, size = x1
), # <-- map fac to colour aesthetic
data = eg1, alpha = 0.3
) + # <-- deleted cex!!
scale_colour_viridis_c(option = "plasma")
```

The resulting plot doesn't show any particular problems with the model because of the way the data were simulated, but hopefully this illustrates what can be possible once you use the low-level functions provided by {gratia}.
The resulting plot doesn't show any particular problems with the model because of the way the data were simulated, but hopefully this illustrates what can be possible once you use the low-level functions provided by {gratia}.
149 changes: 90 additions & 59 deletions vignettes/data-slices.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -39,30 +39,31 @@ The first example uses a small data set from an experimental study of the cold t
## data load and prep
data(CO2, package = "datasets")
plant <- CO2 |>
as_tibble() |>
rename(plant = Plant, type = Type, treatment = Treatment) |>
mutate(plant = factor(plant, ordered = FALSE))
as_tibble() |>
rename(plant = Plant, type = Type, treatment = Treatment) |>
mutate(plant = factor(plant, ordered = FALSE))
```

```{r plot-plant-data}
plant_ylab <- expression(CO[2] ~ uptake ~ (mu * mol ~ m^{-3}))
plant_xlab <- expression(CO[2] ~ concentration ~ (mL ~ L^{-1}))
plant |>
ggplot(aes(x = conc, y = uptake, group = plant, colour = treatment)) +
geom_point() +
geom_line() +
facet_wrap(~ type) +
labs(y = plant_ylab, x = plant_xlab, colour = "Treatment")
ggplot(aes(x = conc, y = uptake, group = plant, colour = treatment)) +
geom_point() +
geom_line() +
facet_wrap(~type) +
labs(y = plant_ylab, x = plant_xlab, colour = "Treatment")
```

One way to model these data is to allow for different smooths for all combinations of the `treatment` and `type` covariates
```{r fit-plant-gam}
plant <- plant |>
mutate(tt = fct_cross(treatment, type))
m_plant <- gam(uptake ~ treatment * type +
s(conc, by = tt, k = 6) + s(plant, bs = "re"),
data = plant, method = "REML", familly = Gamma(link = "log"))
mutate(tt = fct_cross(treatment, type))
m_plant <- gam(uptake ~ treatment * type + s(conc, by = tt, k = 6) +
s(plant, bs = "re"),
data = plant, method = "REML", familly = Gamma(link = "log")
)
overview(m_plant)
```

Expand All @@ -73,16 +74,20 @@ draw(m_plant, residuals = TRUE, scales = "fixed")

We might want to compare model fitted values for the treatment for each of the types (origins), ignoring the random effect component. For this we want to evaluate the model at a range of values of covariate `conc` for some combinations of the other factors. This is a data slice through the covariate space, which we can create using `data_slice()`. To create a data slice for `conc` for the `Quebec` `type` in the `chilled` `treatment` we would use
```{r plant-ds-1}
ds1 <- data_slice(m_plant, conc = evenly(conc, n = 100),
type = level(type, "Quebec"), treatment = level(treatment, "chilled"))
ds1 <- data_slice(m_plant,
conc = evenly(conc, n = 100),
type = level(type, "Quebec"), treatment = level(treatment, "chilled")
)
ds1
```

Notice how `data_slice()` has filled in something for the remaining covariates that we didn't mention? In this case, `data_slice()` doesn't know how `tt` was created, so it has chosen the modal level for the `tt` factor, which is not the correct choice in this case. Instead, we need to specify the correct level explicitly for `tt`
```{r plant-ds-1a}
ds1 <- data_slice(m_plant, conc = evenly(conc, n = 100),
treatment = level(treatment, "chilled"), type = level(type, "Quebec"),
tt = level(tt, "chilled:Quebec"))
ds1 <- data_slice(m_plant,
conc = evenly(conc, n = 100),
treatment = level(treatment, "chilled"), type = level(type, "Quebec"),
tt = level(tt, "chilled:Quebec")
)
ds1
```

Expand All @@ -97,43 +102,56 @@ Notice how we excluded the random effect term; even though we had to specify som
Plotting the fitted values for the data slice now only requires some simple {ggplot2} knowledge
```{r plot-plant-fitted-1}
fv1 |>
ggplot(aes(x = conc, y = .fitted)) +
geom_point(data = plant |>
filter(type == "Quebec", treatment == "chilled"),
mapping = aes(y = uptake),
alpha = 0.8, colour = "steelblue") +
geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci), alpha = 0.2) +
geom_line() +
labs(x = plant_xlab, y = plant_ylab,
title = expression(Estimated ~ CO[2] ~ uptake),
subtitle = "Chilled plants of the Quebec type")
ggplot(aes(x = conc, y = .fitted)) +
geom_point(
data = plant |>
filter(type == "Quebec", treatment == "chilled"),
mapping = aes(y = uptake),
alpha = 0.8, colour = "steelblue"
) +
geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci), alpha = 0.2) +
geom_line() +
labs(
x = plant_xlab, y = plant_ylab,
title = expression(Estimated ~ CO[2] ~ uptake),
subtitle = "Chilled plants of the Quebec type"
)
```

Next, let's compare the fitted effects of the treatment in the Mississippi origin plants
```{r plant-data-slice-2}
ds2 <- data_slice(m_plant, conc = evenly(conc, n = 100),
treatment = evenly(treatment), type = level(type, "Mississippi")) |>
mutate(tt = fct_cross(treatment, type, keep_empty = TRUE))
ds2 <- data_slice(m_plant,
conc = evenly(conc, n = 100),
treatment = evenly(treatment), type = level(type, "Mississippi")
) |>
mutate(tt = fct_cross(treatment, type, keep_empty = TRUE))
ds2
```

Here, we replaced the automatically-generated `tt` variable with the correctly specified call to `fct_cross()`, retaining the levels of the `type` and `treatment` factors. This insures that we have the correct combinations corresponding to the `treatment` and `type` factors but also that we preserve the original levels of the `tt` covariate we created.

We can again visualise the fitted values for this data slice
```{r draw-plant-fitted-values-2}
fitted_values(m_plant, data = ds2, scale = "response",
exclude = "s(plant)") |>
ggplot(aes(x = conc, y = .fitted, group = treatment)) +
geom_point(data = plant |> filter(type == "Mississippi"),
mapping = aes(y = uptake, colour = treatment),
alpha = 0.8) +
geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, fill = treatment),
alpha = 0.2) +
geom_line(aes(colour = treatment)) +
labs(x = plant_xlab, y = plant_ylab,
title = expression(Estimated ~ CO[2] ~ uptake),
subtitle = "Comparison of treatment in plants of the Mississippi type",
colour = "Treatment", fill = "Treatment")
fitted_values(m_plant,
data = ds2, scale = "response",
exclude = "s(plant)"
) |>
ggplot(aes(x = conc, y = .fitted, group = treatment)) +
geom_point(
data = plant |> filter(type == "Mississippi"),
mapping = aes(y = uptake, colour = treatment),
alpha = 0.8
) +
geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, fill = treatment),
alpha = 0.2
) +
geom_line(aes(colour = treatment)) +
labs(
x = plant_xlab, y = plant_ylab,
title = expression(Estimated ~ CO[2] ~ uptake),
subtitle = "Comparison of treatment in plants of the Mississippi type",
colour = "Treatment", fill = "Treatment"
)
```

When we were creating our data slices, we used some helper functions to specify covariate values for the slice. {gratia} provides several such helper functions:
Expand Down Expand Up @@ -161,7 +179,7 @@ In the second example, I'll use the bivariate example data set from {mgcv} but f
df <- data_sim("eg2", n = 1000, scale = 0.25, seed = 2)
# fit the GAM
m_biv <- gam(y ~ te(x,z), data = df, method = "REML")
m_biv <- gam(y ~ te(x, z), data = df, method = "REML")
```

The aim of the example will be to create a univariate data slice through the 2D smooth at user-specified values of `x` while holding `z` at one or more fixed values. We could visualise the effect at the smooth level, using `smooth_estimates()`, or at the response level, as we did above using `fitted_values()`.
Expand All @@ -171,10 +189,11 @@ The aim of the example will be to create a univariate data slice through the 2D
We begin by creating a slice through the data space. We also create a label at this point for a nice axis label.
```{r data-slice-biv-1}
ds3 <- data_slice(m_biv,
x = evenly(x, n = 100),
z = quantile(z, probs = 0.25))
x = evenly(x, n = 100),
z = quantile(z, probs = 0.25)
)
z_val <- with(ds3, round(quantile(z, probs = 0.25),2))
z_val <- with(ds3, round(quantile(z, probs = 0.25), 2))
ylab <- bquote(hat(f)(x, .(z_val)))
```

Expand All @@ -191,16 +210,20 @@ sm |>
ggplot(aes(x = x, y = .estimate)) +
geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci), alpha = 0.2) +
geom_line() +
labs(title = "Evaluation of smooth te(x,z) at fixed z",
y = ylab)
labs(
title = "Evaluation of smooth te(x,z) at fixed z",
y = ylab
)
```

Note that the above interval is not the Marra and Wood (2012) interval. It doesn't include the uncertainty from the model constant term at the moment, but unless the smooth is very close to linear that shouldn't make too much difference.

This extends to multiple slices by asking for several discrete `z`
```{r data-slice-biv-2}
ds4 <- data_slice(m_biv, x = evenly(x, n = 100),
z = round(quantile(z, probs = c(0.25, 0.5, 0.75)), 2))
ds4 <- data_slice(m_biv,
x = evenly(x, n = 100),
z = round(quantile(z, probs = c(0.25, 0.5, 0.75)), 2)
)
sm <- smooth_estimates(m_biv, smooth = "te(x,z)", data = ds4) |>
add_confint() |>
Expand All @@ -209,10 +232,13 @@ sm <- smooth_estimates(m_biv, smooth = "te(x,z)", data = ds4) |>
sm |>
ggplot(aes(x = x, y = .estimate, colour = fz, group = fz)) +
geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, fill = fz, colour = NULL),
alpha = 0.2) +
alpha = 0.2
) +
geom_line() +
labs(title = "Evaluation of smooth te(x,z) at fixed z",
y = expression(hat(f)(x,z)), colour = "z", fill = "z")
labs(
title = "Evaluation of smooth te(x,z) at fixed z",
y = expression(hat(f)(x, z)), colour = "z", fill = "z"
)
```

### Using `fitted_samples()`
Expand All @@ -225,8 +251,10 @@ fitted_values(m_biv, data = ds3) |> # default is response scale, not link
ggplot(aes(x = x, y = .fitted)) +
geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci), alpha = 0.2) +
geom_line() +
labs(title = "Fitted values from model",
y = expression(hat(y)))
labs(
title = "Fitted values from model",
y = expression(hat(y))
)
```

And for the multiple `z` we have
Expand All @@ -235,9 +263,12 @@ fitted_values(m_biv, data = ds4) |>
mutate(fz = factor(z)) |>
ggplot(aes(x = x, y = .fitted, colour = fz, group = fz)) +
geom_ribbon(aes(ymin = .lower_ci, ymax = .upper_ci, fill = fz, colour = NULL),
alpha = 0.2) +
alpha = 0.2
) +
geom_line() +
labs(title = "Fitted values from model",
y = expression(hat(y)), colour = "z", fill = "z")
labs(
title = "Fitted values from model",
y = expression(hat(y)), colour = "z", fill = "z"
)
```
where the only difference here is that now the model constant is included as well as its uncertainty.
Loading

0 comments on commit cde53ab

Please sign in to comment.