-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #60 from ScottClaessens/48-function-to-predict-an-…
…evolutionary-time-series-given-some-initial-values 48 function to predict an evolutionary time series given some initial values
- Loading branch information
Showing
8 changed files
with
691 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
#' Plot a predicted coevolutionary time series from a fitted \code{coevfit} | ||
#' object | ||
#' | ||
#' This function plots a predicted coevolutionary time series using the | ||
#' estimated parameters from a fitted \code{coevfit} model. By default, the | ||
#' plot uses the posterior ancestral states estimated by the model as the | ||
#' starting values, but users can also set their own starting values for | ||
#' traits. Plots can be generated with or without stochastic drift. For more | ||
#' details on the underlying predictive function, see | ||
#' \code{help(coev_pred_series)}. | ||
#' | ||
#' @param object An object of class \code{coevfit} | ||
#' @param prob A value between 0 and 1 indicating the desired probability | ||
#' to be covered by the uncertainty intervals. The default is 0.95. | ||
#' @param ... Additional arguments passed to \code{\link{coev_pred_series}} | ||
#' | ||
#' @return A ggplot object. | ||
#' | ||
#' @author Scott Claessens \email{scott.claessens@@gmail.com}, Erik Ringen | ||
#' \email{erikjacob.ringen@@uzh.ch} | ||
#' | ||
#' @seealso \code{\link{coev_pred_series}} | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' # fit dynamic coevolutionary model | ||
#' fit <- coev_fit( | ||
#' data = authority$data, | ||
#' variables = list( | ||
#' political_authority = "ordered_logistic", | ||
#' religious_authority = "ordered_logistic" | ||
#' ), | ||
#' id = "language", | ||
#' tree = authority$phylogeny, | ||
#' # additional arguments for cmdstanr::sample() | ||
#' chains = 4, | ||
#' parallel_chains = 4, | ||
#' seed = 1 | ||
#' ) | ||
#' | ||
#' # simulated trait co-evolution | ||
#' coev_plot_pred_series( | ||
#' object = fit, | ||
#' stochastic = TRUE | ||
#' ) | ||
#' | ||
#' # expected trait co-evolution, no drift | ||
#' coev_plot_pred_series( | ||
#' object = fit, | ||
#' stochastic = FALSE | ||
#' ) | ||
#' } | ||
#' | ||
#' @export | ||
coev_plot_pred_series <- function(object, prob = 0.95, ...) { | ||
# stop if object not of class coevfit | ||
if (!methods::is(object, "coevfit")) { | ||
stop2( | ||
paste0( | ||
"Argument 'object' must be a fitted coevolutionary model ", | ||
"of class coevfit." | ||
) | ||
) | ||
} | ||
# stop if prob is outside of range 0 - 1 | ||
if (prob <= 0 | prob >= 1) { | ||
stop2("Argument 'prob' is not between 0 and 1.") | ||
} | ||
# user-supplied arguments | ||
user_args <- list(...) | ||
# default arguments from coev_pred_series | ||
default_args <- as.list(formals(coev_pred_series)) | ||
combined_args <- utils::modifyList(default_args, user_args) | ||
combined_args$object <- object | ||
preds <- do.call(coev_pred_series, combined_args) | ||
# create plot | ||
if (combined_args$stochastic == FALSE) { | ||
# get CI quantiles | ||
probs <- c(((1 - prob) / 2), 1 - ((1 - prob) / 2)) | ||
# get expected values in cube format | ||
epreds_long <- | ||
preds |> | ||
cubelyr::as.tbl_cube( | ||
met_name = "est" | ||
) |> | ||
as.data.frame() | ||
# summarise expected values | ||
epreds_summary <- | ||
epreds_long |> | ||
dplyr::group_by(.data$response, .data$time) |> | ||
dplyr::summarise( | ||
mean = mean(.data$est), | ||
lower_CI = stats::quantile(.data$est, probs[1]), | ||
upper_CI = stats::quantile(.data$est, probs[2]) | ||
) | ||
# plot | ||
p <- | ||
ggplot2::ggplot( | ||
epreds_summary, | ||
ggplot2::aes( | ||
x = .data$time, | ||
y = .data$mean, | ||
fill = .data$response, | ||
color = .data$response, | ||
linetype = .data$response | ||
) | ||
) + | ||
ggplot2::geom_line(lwd = 1) + | ||
ggplot2::geom_ribbon( | ||
ggplot2::aes( | ||
ymin = .data$lower_CI, | ||
ymax = .data$upper_CI | ||
), | ||
alpha = 0.25, | ||
lwd = 0.25 | ||
) + | ||
ggplot2::theme_classic(base_size = 14) + | ||
ggplot2::scale_x_continuous( | ||
breaks = c(0, max(epreds_summary$time)), | ||
labels = c("LCA", "present") | ||
) + | ||
ggplot2::ylab("trait value (latent scale)") + | ||
ggplot2::xlab("time") + | ||
ggplot2::theme( | ||
legend.title = ggplot2::element_blank(), | ||
strip.background = ggplot2::element_blank(), | ||
strip.text = ggplot2::element_blank() | ||
) + | ||
ggplot2::ggtitle("Expected trait coevolution") | ||
} else if (combined_args$stochastic == TRUE) { | ||
# get predictions | ||
sims_long <- | ||
preds |> | ||
cubelyr::as.tbl_cube(met_name = "est") |> | ||
as.data.frame() |> | ||
dplyr::mutate( | ||
sim = factor( | ||
paste("sim", .data$samps), | ||
levels = paste("sim", sort(unique(.data$samps))) | ||
) | ||
) | ||
# plot | ||
p <- | ||
sims_long |> | ||
dplyr::filter(.data$samps <= 15) |> | ||
ggplot2::ggplot( | ||
ggplot2::aes( | ||
x = .data$time, | ||
y = .data$est, | ||
color = .data$response | ||
) | ||
) + | ||
ggplot2::facet_wrap( ~ .data$sim, ncol = 5) + | ||
ggplot2::geom_line(lwd = 1) + | ||
ggplot2::theme_minimal(base_size = 14) + | ||
ggplot2::scale_x_continuous( | ||
breaks = c(0, max(sims_long$time)), | ||
labels = c("LCA", "present") | ||
) + | ||
ggplot2::ylab("trait value (latent scale)") + | ||
ggplot2::xlab("time") + | ||
ggplot2::theme( | ||
legend.title = ggplot2::element_blank(), | ||
strip.background = ggplot2::element_blank(), | ||
panel.spacing.x = ggplot2::unit(1, "lines"), | ||
axis.text.x = ggplot2::element_blank(), | ||
axis.ticks.x = ggplot2::element_blank() | ||
) + | ||
ggplot2::ggtitle("Predicted trait coevolution") | ||
} | ||
return(p) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
#' Predict a co-evolutionary time series from a fitted \code{coevfit} object | ||
#' | ||
#' This function produces predicted values of traits over a time series, given | ||
#' the estimated parameters for a \code{coevfit} model. This function is used | ||
#' under the hood by the plotting function \code{\link{coev_pred_series}}. | ||
#' | ||
#' @param object An object of class \code{coevfit} | ||
#' @param eta_anc If \code{NULL} (default), the starting values for the latent | ||
#' states \eqn{\eta} will be set to the estimated ancestral states at the | ||
#' root from the \code{coevfit} model. Otherwise, a named numeric vector with | ||
#' length equal to the number of variables specifying the initial \eqn{\eta} | ||
#' values. All variable names must be included in this vector. | ||
#' @param tmax A positive number indicating the total duration of time for which | ||
#' to predict. Set to 1 by default, corresponding to the entire time depth of | ||
#' the original phylogenetic tree(s). | ||
#' @param ntimes A positive integer indicating the total number of discrete time | ||
#' steps over which to make predictions. Each step corresponds to a time | ||
#' difference of dt = tmax/ntimes. Set to 30 by default. | ||
#' @param ndraws An integer indicating the number of draws to return. The | ||
#' default and maximum number of draws is the size of the posterior sample. | ||
#' @param stochastic Logical (defaults to \code{FALSE}); indicator of whether | ||
#' predictions should include only the expected co-evolutionary change | ||
#' due to deterministic selection (FALSE) or also stochastic drift (TRUE). | ||
#' | ||
#' @return An \[ndraw, ntimes, nvariables\] array of predicted \eqn{\eta} | ||
#' values. | ||
#' | ||
#' @author Scott Claessens \email{scott.claessens@@gmail.com}, Erik Ringen | ||
#' \email{erikjacob.ringen@@uzh.ch} | ||
#' | ||
#' @seealso \code{\link{coev_plot_pred_series}} | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' # fit dynamic coevolutionary model | ||
#' fit <- coev_fit( | ||
#' data = authority$data, | ||
#' variables = list( | ||
#' political_authority = "ordered_logistic", | ||
#' religious_authority = "ordered_logistic" | ||
#' ), | ||
#' id = "language", | ||
#' tree = authority$phylogeny, | ||
#' # additional arguments for cmdstanr::sample() | ||
#' chains = 4, | ||
#' parallel_chains = 4, | ||
#' seed = 1 | ||
#' ) | ||
#' | ||
#' # simulate trait co-evolution | ||
#' sims <- coev_pred_series( | ||
#' object = fit, | ||
#' stochastic = TRUE | ||
#' ) | ||
#' | ||
#' # expected trait co-evolution, no drift | ||
#' epreds <- coev_pred_series( | ||
#' object = fit, | ||
#' stochastic = FALSE | ||
#' ) | ||
#' } | ||
#' | ||
#' @export | ||
coev_pred_series <- function(object, eta_anc = NULL, tmax = 1, ntimes = 30, | ||
ndraws = NULL, stochastic = FALSE) { | ||
# stop if object is not of class coevfit | ||
if (!methods::is(object, "coevfit")) { | ||
stop2( | ||
paste0( | ||
"Argument 'object' must be a fitted coevolutionary model of class ", | ||
"coevfit." | ||
) | ||
) | ||
} | ||
if (!is.null(eta_anc)) { | ||
# stop if eta_anc is neither NULL nor | ||
# a named vector of length equal to the number of variables | ||
if (!is.numeric(eta_anc) | length(eta_anc) != length(object$variables)) { | ||
stop2( | ||
paste0( | ||
"Argument 'eta_anc' must be numeric and equal in length to the ", | ||
"number of variables." | ||
) | ||
) | ||
} | ||
# stop if eta_anc is not a named vector | ||
if (is.null(names(eta_anc))) { | ||
stop2("Argument 'eta_anc' is not a named vector.") | ||
} | ||
# stop if eta_anc does not have names equal to variables in the model | ||
if (!identical(sort(names(eta_anc)), sort(names(object$variables)))) { | ||
stop2( | ||
paste0( | ||
"Argument 'eta_anc' has names different to the variables included ", | ||
"in the model." | ||
) | ||
) | ||
} | ||
} | ||
# stop if tmax is not numeric or not positive | ||
if (!is.numeric(tmax) | length(tmax) != 1) { | ||
stop2("Argument 'tmax' must be a single numeric value.") | ||
} else if (tmax <= 0) { | ||
stop2("Argument 'tmax' must be positive.") | ||
} | ||
# stop if ndraws is not a single integer between 1 and the total num draws | ||
if (!is.null(ndraws)) { | ||
if (!is.integer(ndraws) | length(ndraws) != 1) { | ||
stop2("Argument 'ndraws' must be a single integer.") | ||
} else if (ndraws < 1 | ndraws > nrow(object$fit$draws())) { | ||
stop2( | ||
"Argument 'ndraws' must be between 1 and the total number of draws." | ||
) | ||
} | ||
} | ||
# stop if stochastic not logical | ||
if (!is.logical(stochastic)) { | ||
stop2("Argument 'stochastic' must be logical.") | ||
} | ||
# get posterior samples and number of variables | ||
post <- extract_samples(object) | ||
J <- length(object$variables) | ||
# get eta_anc in the correct order | ||
eta_anc <- eta_anc[names(object$variables)] | ||
# get number of samples | ||
nsamps <- ifelse(is.null(ndraws), length(post$lp__), ndraws) | ||
# initialise empty array for predictions | ||
preds <- | ||
array( | ||
NA, | ||
dim = c(nsamps, ntimes + 1, J), | ||
dimnames = list( | ||
samps = 1:nsamps, | ||
time = 1:(ntimes + 1), | ||
response = names(object$variables) | ||
) | ||
) | ||
# if user defined ancestral states | ||
# otherwise, use model inferred ancestral states | ||
if (!is.null(eta_anc)) { | ||
for (i in 1:nsamps) preds[i, 1, ] = eta_anc | ||
} else { | ||
# set posterior ancestral states | ||
eta_anc_long <- post$eta_anc | ||
ntrees <- dim(eta_anc_long)[2] | ||
# shuffle dimensions of tree dimension to get random draws | ||
eta_anc_long <- | ||
eta_anc_long[, sample(1:ntrees, size = ntrees, replace = FALSE), ] | ||
if (ntrees > 1) { | ||
# make into 2d matrix by stacking trees | ||
eta_anc_long2 <- eta_anc_long[, 1, ] | ||
for (t in 2:ntrees) { | ||
eta_anc_long2 <- rbind(eta_anc_long2, eta_anc_long[,t,]) | ||
eta_anc_long <- eta_anc_long2 | ||
} | ||
} | ||
for (i in 1:nsamps) { | ||
preds[i, 1, ] = eta_anc_long[i, ] | ||
} | ||
} | ||
# calculate predictions | ||
for (i in 1:nsamps) { | ||
# selection parameters | ||
A <- post$A[i,,] | ||
A_delta <- as.matrix(Matrix::expm(A * tmax / ntimes)) | ||
inv_A <- solve(A) | ||
I <- diag(rep(1, J)) | ||
b <- post$b[i,] | ||
# drift parameters | ||
Q_inf <- post$Q_inf[i,,] | ||
VCV <- Q_inf - ((A_delta) %*% Q_inf %*% t(A_delta)) | ||
chol_VCV <- t(chol(VCV)) | ||
# calculate predictions over time | ||
for (t in 1:ntimes) { | ||
preds[i, t + 1, ] <- (A_delta %*% preds[i, t, ] + | ||
(inv_A %*% (A_delta - I) %*% b))[,1] | ||
# add drift | ||
if (stochastic == TRUE) { | ||
preds[i, t + 1, ] <- | ||
preds[i, t + 1, ] + (chol_VCV %*% stats::rnorm(J, 0, 1)) | ||
} | ||
} | ||
} | ||
# shuffle draws to de-correlate when doing simulations | ||
preds[sample(1:nsamps, size = nsamps, replace = FALSE), , ] | ||
} |
Oops, something went wrong.