Skip to content

Commit

Permalink
#47 Estimated correlated drift by default
Browse files Browse the repository at this point in the history
  • Loading branch information
Scott Claessens authored and Scott Claessens committed Oct 1, 2024
1 parent 03898d3 commit edafd70
Show file tree
Hide file tree
Showing 42 changed files with 1,061 additions and 623 deletions.
33 changes: 23 additions & 10 deletions R/coev_fit.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,13 @@
#' will additionally control for spatial location by including a separate
#' Gaussian Process over locations for every coevolving variable in the model.
#' @param prior (optional) A named list of priors for the model. If not
#' specified, the model uses default priors (see Stan code). Alternatively,
#' the user can specify a named list of priors. The list must contain
#' non-duplicated entries for any of the following variables: the
#' specified, the model uses default priors (see \code{help(coev_fit)}).
#' Alternatively, the user can specify a named list of priors. The list must
#' contain non-duplicated entries for any of the following parameters: the
#' autoregressive effects (\code{A_diag}), the cross effects
#' (\code{A_offdiag}), the drift scale parameters (\code{Q_diag}), the
#' continuous time intercepts (\code{b}), the ancestral states for the traits
#' (\code{A_offdiag}), the Cholesky factor for the drift matrix (\code{L_R}),
#' the drift std. dev. parameters (\code{Q_sigma}), the continuous time
#' intercepts (\code{b}), the ancestral states for the traits
#' (\code{eta_anc}), the cutpoints for ordinal variables (\code{c}), the
#' overdispersion parameters for negative binomial variables (\code{phi}),
#' the degrees of freedom parameters for Student t variables (\code{nu}),
Expand All @@ -66,6 +67,10 @@
#' efficiency and ensure accurate inferences. If \code{FALSE}, variables are
#' left unstandardised for model fitting. In this case, users should take care
#' to set sensible priors on variables.
#' @param estimate_Q_offdiag Logical. If \code{TRUE} (default), the model
#' estimates the off-diagonals for the \eqn{Q} drift matrix (i.e., correlated
#' drift). If \code{FALSE}, the off-diagonals for the \eqn{Q} drift matrix
#' are set to zero.
#' @param prior_only Logical. If \code{FALSE} (default), the model is fitted to
#' the data and returns a posterior distribution. If \code{TRUE}, the model
#' samples from the prior only, ignoring the likelihood.
Expand Down Expand Up @@ -98,7 +103,9 @@
#'
#' - \code{A_diag} (autoregressive effects) = \code{std_normal()}
#' - \code{A_offdiag} (cross effects) = \code{std_normal()}
#' - \code{Q_diag} (drift scale parameters) = \code{std_normal()}
#' - \code{L_R} (Cholesky factor for drift matrix) =
#' \code{lkj_corr_cholesky(4)}
#' - \code{Q_sigma} (drift std. dev. parameters) = \code{std_normal()}
#' - \code{b} (continuous time intercepts) = \code{std_normal()}
#' - \code{eta_anc} (trait ancestral states) = \code{std_normal()}
#' - \code{c} (ordinal cutpoints) = \code{normal(0, 2)}
Expand Down Expand Up @@ -179,16 +186,20 @@
#' @export
coev_fit <- function(data, variables, id, tree,
effects_mat = NULL, dist_mat = NULL,
prior = NULL, scale = TRUE, prior_only = FALSE, ...) {
prior = NULL, scale = TRUE,
estimate_Q_offdiag = TRUE,
prior_only = FALSE, ...) {
# check arguments
run_checks(data, variables, id, tree, effects_mat,
dist_mat, prior, scale, prior_only)
dist_mat, prior, scale, estimate_Q_offdiag, prior_only)
# write stan code for model
sc <- coev_make_stancode(data, variables, id, tree, effects_mat,
dist_mat, prior, scale, prior_only)
dist_mat, prior, scale,
estimate_Q_offdiag, prior_only)
# get data list for stan
sd <- coev_make_standata(data, variables, id, tree, effects_mat,
dist_mat, prior, scale, prior_only)
dist_mat, prior, scale,
estimate_Q_offdiag, prior_only)
# fit model
model <-
cmdstanr::cmdstan_model(
Expand All @@ -208,11 +219,13 @@ coev_fit <- function(data, variables, id, tree,
variables = variables,
id = id,
tree = tree,
tree_name = deparse(substitute(tree)),
stan_code = sc,
stan_data = sd,
effects_mat = sd$effects_mat,
dist_mat = sd$dist_mat,
scale = scale,
estimate_Q_offdiag = estimate_Q_offdiag,
prior_only = prior_only
)
class(out) <- "coevfit"
Expand Down
11 changes: 8 additions & 3 deletions R/coev_helpers.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# helper function for checking arguments
run_checks <- function(data, variables, id, tree, effects_mat,
dist_mat, prior, scale, prior_only) {
dist_mat, prior, scale, estimate_Q_offdiag,
prior_only) {
# coerce data argument to data frame
data <- try(as.data.frame(data), silent = TRUE)
# stop if data not coercible to data frame
Expand Down Expand Up @@ -282,8 +283,8 @@ run_checks <- function(data, variables, id, tree, effects_mat,
paste0(
"Argument 'prior' list contains names that are not allowed. Please ",
"use only the following names: 'b', 'eta_anc', 'A_offdiag', ",
"'A_diag', 'Q_diag', 'c', 'phi', 'nu', 'sigma_dist', 'rho_dist', ",
"'sigma_group', and 'L_group'"
"'A_diag', 'L_R', 'Q_sigma', 'c', 'phi', 'nu', 'sigma_dist', ",
"'rho_dist', 'sigma_group', and 'L_group'"
)
)
}
Expand All @@ -296,6 +297,10 @@ run_checks <- function(data, variables, id, tree, effects_mat,
if (!is.logical(scale)) {
stop2("Argument 'scale' is not logical.")
}
# stop if estimate_Q_offdiag is not logical
if (!is.logical(estimate_Q_offdiag)) {
stop2("Argument 'estimate_Q_offdiag' is not logical.")
}
# stop if prior_only is not logical
if (!is.logical(prior_only)) {
stop2("Argument 'prior_only' is not logical.")
Expand Down
55 changes: 43 additions & 12 deletions R/coev_make_stancode.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,13 @@
#' will additionally control for spatial location by including a separate
#' Gaussian Process over locations for every coevolving variable in the model.
#' @param prior (optional) A named list of priors for the model. If not
#' specified, the model uses default priors (see Stan code). Alternatively,
#' the user can specify a named list of priors. The list must contain
#' non-duplicated entries for any of the following variables: the
#' specified, the model uses default priors (see \code{help(coev_fit)}).
#' Alternatively, the user can specify a named list of priors. The list must
#' contain non-duplicated entries for any of the following parameters: the
#' autoregressive effects (\code{A_diag}), the cross effects
#' (\code{A_offdiag}), the drift scale parameters (\code{Q_diag}), the
#' continuous time intercepts (\code{b}), the ancestral states for the traits
#' (\code{A_offdiag}), the Cholesky factor for the drift matrix (\code{L_R}),
#' the drift std. dev. parameters (\code{Q_sigma}), the continuous time
#' intercepts (\code{b}), the ancestral states for the traits
#' (\code{eta_anc}), the cutpoints for ordinal variables (\code{c}), the
#' overdispersion parameters for negative binomial variables (\code{phi}),
#' the degrees of freedom parameters for Student t variables (\code{nu}),
Expand All @@ -58,6 +59,10 @@
#' efficiency and ensure accurate inferences. If \code{FALSE}, variables are
#' left unstandardised for model fitting. In this case, users should take care
#' to set sensible priors on variables.
#' @param estimate_Q_offdiag Logical. If \code{TRUE} (default), the model
#' estimates the off-diagonals for the \deqn{Q} drift matrix (i.e., correlated
#' drift). If \code{FALSE}, the off-diagonals for the \deqn{Q} drift matrix
#' are set to zero.
#' @param prior_only Logical. If \code{FALSE} (default), the model is fitted to
#' the data and returns a posterior distribution. If \code{TRUE}, the model
#' samples from the prior only, ignoring the likelihood.
Expand Down Expand Up @@ -100,10 +105,12 @@
#' @export
coev_make_stancode <- function(data, variables, id, tree,
effects_mat = NULL, dist_mat = NULL,
prior = NULL, scale = TRUE, prior_only = FALSE) {
prior = NULL, scale = TRUE,
estimate_Q_offdiag = TRUE,
prior_only = FALSE) {
# check arguments
run_checks(data, variables, id, tree, effects_mat,
dist_mat, prior, scale, prior_only)
dist_mat, prior, scale, estimate_Q_offdiag, prior_only)
# coerce data argument to data frame
data <- as.data.frame(data)
# extract distributions and variable names from named list
Expand All @@ -116,7 +123,8 @@ coev_make_stancode <- function(data, variables, id, tree,
eta_anc = "std_normal()",
A_offdiag = "std_normal()",
A_diag = "std_normal()",
Q_diag = "std_normal()",
L_R = "lkj_corr_cholesky(4)",
Q_sigma = "std_normal()",
c = "normal(0, 2)",
nu = "gamma(2, 0.1)",
sigma_dist = "exponential(1)",
Expand Down Expand Up @@ -298,8 +306,18 @@ coev_make_stancode <- function(data, variables, id, tree,
sc_parameters <- paste0(
"parameters{\n",
" vector<upper=0>[J] A_diag; // autoregressive terms of A\n",
" vector[num_effects - J] A_offdiag; // cross-lagged terms of A\n",
" vector<lower=0>[J] Q_diag; // self-drift terms\n",
" vector[num_effects - J] A_offdiag; // cross-lagged terms of A\n"
)
# add cholesky factor for Q matrix if estimating Q off diagonals
if (estimate_Q_offdiag) {
sc_parameters <- paste0(
sc_parameters,
" cholesky_factor_corr[J] L_R; // lower-tri choleksy decomp corr mat, used to construct Q mat\n"
)
}
sc_parameters <- paste0(
sc_parameters,
" vector<lower=0>[J] Q_sigma; // std deviation parameters of the Q mat\n",
" vector[J] b; // SDE intercepts\n",
" array[N_tree] vector[J] eta_anc; // ancestral states\n",
" array[N_tree, N_seg - 1] vector[J] z_drift; // stochastic drift, unscaled and uncorrelated\n"
Expand Down Expand Up @@ -359,7 +377,11 @@ coev_make_stancode <- function(data, variables, id, tree,
"transformed parameters{\n",
" array[N_tree, N_seg] vector[J] eta;\n",
" matrix[J,J] A = diag_matrix(A_diag); // selection matrix\n",
" matrix[J,J] Q = diag_matrix(Q_diag); // drift matrix\n",
ifelse(
estimate_Q_offdiag,
" matrix[J,J] Q = diag_matrix(Q_sigma) * (L_R * L_R') * diag_matrix(Q_sigma); // drift matrix\n",
" matrix[J,J] Q = diag_matrix(Q_sigma^2); // drift matrix\n"
),
" matrix[J,J] Q_inf; // asymptotic covariance matrix\n",
" array[N_tree, N_seg] vector[J] drift_tips; // terminal drift parameters\n",
" array[N_tree, N_seg] vector[J] sigma_tips; // terminal drift parameters\n"
Expand Down Expand Up @@ -463,7 +485,8 @@ coev_make_stancode <- function(data, variables, id, tree,
" }\n",
" A_offdiag ~ ", priors$A_offdiag, ";\n",
" A_diag ~ ", priors$A_diag, ";\n",
" Q_diag ~ ", priors$Q_diag, ";\n"
ifelse(estimate_Q_offdiag, paste0(" L_R ~ ", priors$L_R, ";\n"), ""),
" Q_sigma ~ ", priors$Q_sigma, ";\n"
)
# add priors for any cut points
for (j in 1:length(distributions)) {
Expand Down Expand Up @@ -606,6 +629,14 @@ coev_make_stancode <- function(data, variables, id, tree,
" vector[N_obs*J] log_lik; // log-likelihood\n",
" array[N_tree,N_obs,J] real yrep; // predictive checks\n"
)
if (estimate_Q_offdiag) {
sc_generated_quantities <-
paste0(
sc_generated_quantities,
" matrix[J,J] cor_R; // correlated drift\n",
" cor_R = multiply_lower_tri_self_transpose(L_R);\n"
)
}
if (any(duplicated(data[,id]))) {
sc_generated_quantities <-
paste0(
Expand Down
21 changes: 14 additions & 7 deletions R/coev_make_standata.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,13 @@
#' will additionally control for spatial location by including a separate
#' Gaussian Process over locations for every coevolving variable in the model.
#' @param prior (optional) A named list of priors for the model. If not
#' specified, the model uses default priors (see Stan code). Alternatively,
#' the user can specify a named list of priors. The list must contain
#' non-duplicated entries for any of the following variables: the
#' specified, the model uses default priors (see \code{help(coev_fit)}).
#' Alternatively, the user can specify a named list of priors. The list must
#' contain non-duplicated entries for any of the following parameters: the
#' autoregressive effects (\code{A_diag}), the cross effects
#' (\code{A_offdiag}), the drift scale parameters (\code{Q_diag}), the
#' continuous time intercepts (\code{b}), the ancestral states for the traits
#' (\code{A_offdiag}), the Cholesky factor for the drift matrix (\code{L_R}),
#' the drift std. dev. parameters (\code{Q_sigma}), the continuous time
#' intercepts (\code{b}), the ancestral states for the traits
#' (\code{eta_anc}), the cutpoints for ordinal variables (\code{c}), the
#' overdispersion parameters for negative binomial variables (\code{phi}),
#' the degrees of freedom parameters for Student t variables (\code{nu}),
Expand All @@ -57,6 +58,10 @@
#' efficiency and ensure accurate inferences. If \code{FALSE}, variables are
#' left unstandardised for model fitting. In this case, users should take care
#' to set sensible priors on variables.
#' @param estimate_Q_offdiag Logical. If \code{TRUE} (default), the model
#' estimates the off-diagonals for the \deqn{Q} drift matrix (i.e., correlated
#' drift). If \code{FALSE}, the off-diagonals for the \deqn{Q} drift matrix
#' are set to zero.
#' @param prior_only Logical. If \code{FALSE} (default), the model is fitted to
#' the data and returns a posterior distribution. If \code{TRUE}, the model
#' samples from the prior only, ignoring the likelihood.
Expand Down Expand Up @@ -96,10 +101,12 @@
#' @export
coev_make_standata <- function(data, variables, id, tree,
effects_mat = NULL, dist_mat = NULL,
prior = NULL, scale = TRUE, prior_only = FALSE) {
prior = NULL, scale = TRUE,
estimate_Q_offdiag = TRUE,
prior_only = FALSE) {
# check arguments
run_checks(data, variables, id, tree, effects_mat,
dist_mat, prior, scale, prior_only)
dist_mat, prior, scale, estimate_Q_offdiag, prior_only)
# coerce data argument to data frame
data <- as.data.frame(data)
# warning if scale = FALSE
Expand Down
9 changes: 7 additions & 2 deletions R/coev_plot_flowfield.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#' depending on their current states, with the direction and strength of
#' change depicting with the direction and size of arrows. If nullclines are
#' included, they represent the parameter combinations where each trait is
#' at equilibrium, depending on the state of the other trait If three or
#' at equilibrium, depending on the state of the other trait. If three or
#' more traits were included in the model, other traits are held at their
#' median values during these computations.
#'
Expand Down Expand Up @@ -74,7 +74,12 @@ coev_plot_flowfield <- function(object, var1, var2, nullclines = FALSE,
limits = c(-2.5, 2.5)) {
# stop if object is not of class coevfit
if (!methods::is(object, "coevfit")) {
stop2("Argument 'object' must be a fitted coevolutionary model of class coevfit.")
stop2(
paste0(
"Argument 'object' must be a fitted coevolutionary model of class ",
"coevfit."
)
)
}
if (!is.character(var1) | length(var1) != 1) {
# stop if var1 not character string of length one
Expand Down
6 changes: 3 additions & 3 deletions R/coev_plot_selection_gradient.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,10 @@ coev_plot_selection_gradient <- function(object, var1, var2,
mads <- unlist(lapply(eta, stats::mad))
lowers <- meds + limits[1]*mads
uppers <- meds + limits[2]*mads
# get median parameter values for A, b, and Q_diag
# get median parameter values for A, b, and Q_sigma
A <- stats::median(draws$A)
b <- stats::median(draws$b)
Q_diag <- stats::median(draws$Q_diag)
Q_sigma <- stats::median(draws$Q_sigma)
# ornstein uhlenbeck sde function for response and predictor variable
OU_sde <- function(resp_value, pred_value, resp_id, pred_id) {
# sde intercept
Expand All @@ -146,7 +146,7 @@ coev_plot_selection_gradient <- function(object, var1, var2,
# scale by mad for response variable
out <- out / mads[resp_id]
# divide by sigma scaled by mad for response variable
sigma <- Q_diag[resp_id] / mads[resp_id]
sigma <- Q_sigma[resp_id] / mads[resp_id]
out <- out / sigma
return(out)
}
Expand Down
Loading

0 comments on commit edafd70

Please sign in to comment.