diff --git a/.Rbuildignore b/.Rbuildignore index fed641f..8ea6adf 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -5,10 +5,10 @@ ^\.Rproj\.user$ _targets.R _targets/ - ^_pkgdown\.yml$ ^docs$ ^pkgdown$ ^\.github$ ^doc$ ^Meta$ +^data-raw/analyst_data/S2$ diff --git a/DESCRIPTION b/DESCRIPTION index 714e0b7..553be93 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ManyEcoEvo Title: Meta-analyse data from 'Many-Analysts' style studies -Version: 2.3.0.9000 +Version: 2.3.0.9003 Authors@R: c( person("Elliot", "Gould", , "elliot.gould@unimelb.edu.au", role = c("aut", "cre"), comment = c(ORCID = "https://orcid.org/0000-0002-6585-538X")), @@ -20,7 +20,9 @@ URL: https://github.com/egouldo/ManyEcoEvo, https://egouldo.github.io/ManyEcoEvo/ BugReports: https://github.com/egouldo/ManyEcoEvo/issues Depends: - R (>= 2.10) + R (>= 2.10), + rmarkdown, + bookdown Imports: betapart, cli, @@ -41,7 +43,6 @@ Imports: tibble, tidyr, tidyselect, - tidyselect, withr Suggests: broom.mixed, diff --git a/NAMESPACE b/NAMESPACE index 5610202..a181a41 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -111,9 +111,11 @@ import(ggbeeswarm) import(ggplot2) import(lme4) import(metafor) +import(purrr) import(recipes) import(rlang) import(see) +import(stringr) import(tidyr) importFrom(EnvStats,stat_n_text) importFrom(broom,tidy) @@ -122,8 +124,8 @@ importFrom(cli,cli_abort) importFrom(cli,cli_alert_info) importFrom(cli,cli_alert_warning) importFrom(cli,cli_h2) +importFrom(data.table,setnames) importFrom(dplyr,across) -importFrom(dplyr,case_when) importFrom(dplyr,count) importFrom(dplyr,distinct) importFrom(dplyr,ends_with) @@ -149,6 +151,7 @@ importFrom(metaviz,viz_funnel) importFrom(parameters,parameters) importFrom(performance,performance) importFrom(pointblank,col_vals_not_null) +importFrom(pointblank,has_columns) importFrom(pointblank,stop_if_not) importFrom(pointblank,test_col_vals_gte) importFrom(purrr,keep) diff --git a/R/assign_transformation_type.R b/R/assign_transformation_type.R index 2a4d233..9d3300a 100644 --- a/R/assign_transformation_type.R +++ b/R/assign_transformation_type.R @@ -1,13 +1,16 @@ -#' Assign back-transformation type to be applied to analysis point-estimates +#' Assign back-transformation type to be applied to analysts' point-estimates #' #' @param response_transformation Character vector of length 1L containing the analysis response transformation #' @param link_fun Character vector of length 1L containing the analysis link function #' -#' @return A character vector of length 1L +#' @return A character vector of length 1L containing the back-transformation type to be applied to the analysts' point-estimates. Is either "identity", "double_transformation", or the value of `link_fun` or `response_transformation`, or `NA`, if an appropriate transformation type cannot be assigned. +#' @details +#' Based on the response transformation and link function, the function assigns the back-transformation type to be applied to the analysts' point-estimates. The function and assigns the identity transformation if the effects were reported on the link-scale and the estimates are already back-transformed the original response variable prior to modelling. When either of these cases is not true for a given analysis, the function returns the value of the `link_fun` or `response_transformation` argument. When an analysis has been reported on the link-scale and the analyst transformed the response variable prior to modelling, the function assigns the `"double-transformation"` value for that analysis. When the `response_transformation` and `link_fun` arguments are missing, the function assigns the `"identity"` value to the analysis, assuming that `NA` values are equivalent to the identity transformation. #' @export -#' @importFrom dplyr case_when -#' @importFrom rlang is_na -#' @importFrom rlang na_chr +#' @import dplyr +#' @import rlang +#' @family back-transformation functions +#' @seealso [prepare_response_variables_yi(), standardise_response()]. To be called prior to [clean_response_transformation()]. assign_transformation_type <- function(response_transformation = character(1L), link_fun = character(1L)) { # # Link-Fun: Set back.transformed to "identity" diff --git a/R/back_transformations.R b/R/back_transformations.R index 66809ca..1ef83c3 100644 --- a/R/back_transformations.R +++ b/R/back_transformations.R @@ -1,17 +1,21 @@ # --- Back-transformation Conversion Functions --- -# convertion functions -# we assume estimates are normally distributed -# TODO Question - all natural log (no log10) - -#' Back transform beta estimates for models with log-link -#' -#' @param beta Analyst beta estimate or yi estimate -#' @param se Standard error of analyst's beta estimate or yi estimate. +#' Back-transform effect-sizes to response scale. +#' @description +#' Transforms effect-sizes and their standard errors to the response scale. +#' +#' @details We assume analysts' estimates are normally distributed. Each function uses a normal distribution to simulate the a distribution of effect-sizes and their standard errors. Next this distribution is back-transformed to the desired response scale. The mean `m_est`, standard error `se_est`, and quantiles (`lower` and `upper`) of the back-transformed distribution are returned within a dataframe. +#' @param beta Analyst beta estimate +#' @param se Standard error of analyst's effect size estimate $\\beta$ +#' or out-of-sample prediction estimate $y\\_i$. #' @param sim numeric vector of length 1. number of simulations. -#' -#' @return data frame containing the mean estimate, its standard error, and quantiles -#' @export +#' @return data frame containing the mean estimate, its standard error, and quantiles. #' @family back transformation +#' @name back +NULL +#> NULL + +#' @describeIn back Back transform beta estimates for models with log-link +#' @export log_back <- function(beta, se, sim) { simulated <- rnorm(sim, beta, se) original <- exp(simulated) %>% # exponential = inverse of log @@ -29,14 +33,8 @@ log_back <- function(beta, se, sim) { return(set) } -#' Back transform beta estimates for models with logit-link -#' @param beta Analyst beta estimate -#' @param se Standard error of analyst's beta estimate. -#' @param sim numeric vector of length 1. number of simulations. -#' -#' @return data frame containing the mean estimate, its standard error, and quantiles +#' @describeIn back Back transform beta estimates for models with logit-link #' @export -#' @family back transformation logit_back <- function(beta, se, sim) { simulated <- rnorm(sim, beta, se) original <- plogis(simulated) %>% # invlogit @@ -54,14 +52,8 @@ logit_back <- function(beta, se, sim) { return(set) } -#' Back transform beta estimates for models with probit-link -#' @param beta Analyst beta estimate -#' @param se Standard error of analyst's beta estimate. -#' @param sim numeric vector of length 1. number of simulations. -#' -#' @return data frame containing the mean estimate, its standard error, and quantiles +#' @describeIn back Back transform beta estimates for models with probit-link #' @export -#' @family back transformation probit_back <- function(beta, se, sim) { simulated <- rnorm(sim, beta, se) original <- pnorm(simulated) %>% # inv-probit @@ -79,14 +71,8 @@ probit_back <- function(beta, se, sim) { return(set) } -#' Back transform beta estimates for models with $1/x$ link -#' @param beta Analyst beta estimate -#' @param se Standard error of analyst's beta estimate. -#' @param sim numeric vector of length 1. number of simulations. -#' -#' @return data frame containing the mean estimate, its standard error, and quantiles +#' @describeIn back Back transform beta estimates for models with $1/x$ link #' @export -#' @family back transformation inverse_back <- function(beta, se, sim) { simulated <- rnorm(sim, beta, se) original <- 1 / simulated %>% # inverse @@ -104,14 +90,8 @@ inverse_back <- function(beta, se, sim) { return(set) } -#' Back transform beta estimates for models with $x^2$-link -#' @param beta Analyst beta estimate -#' @param se Standard error of analyst's beta estimate. -#' @param sim numeric vector of length 1. number of simulations. -#' -#' @return data frame containing the mean estimate, its standard error, and quantiles +#' @describeIn back Back transform beta estimates for models with $x^2$-link #' @export -#' @family back transformation square_back <- function(beta, se, sim) { simulated <- rnorm(sim, beta, se) original <- sqrt(simulated) %>% # inverse of x^2 @@ -129,14 +109,8 @@ square_back <- function(beta, se, sim) { return(set) } -#' Back transform beta estimates for models with $x^3$-link -#' @param beta Analyst beta estimate -#' @param se Standard error of analyst's beta estimate. -#' @param sim numeric vector of length 1. number of simulations. -#' -#' @return data frame containing the mean estimate, its standard error, and quantiles +#' @describeIn back Back transform beta estimates for models with $x^3$-link #' @export -#' @family back transformation cube_back <- function(beta, se, sim) { simulated <- rnorm(sim, beta, se) original <- pracma::nthroot(simulated, n = 3) %>% # inverse of x^3, use non-base to allow for -ve numbers @@ -154,14 +128,8 @@ cube_back <- function(beta, se, sim) { return(set) } -#' Back transform beta estimates for models with identity-link -#' @param beta Analyst beta estimate -#' @param se Standard error of analyst's beta estimate. -#' @param sim numeric vector of length 1. number of simulations. -#' -#' @return data frame containing the mean estimate, its standard error, and quantiles +#' @describeIn back Back transform beta estimates for models with identity-link #' @export -#' @family back transformation identity_back <- function(beta, se, sim) { # identity (typo) TODO simulated <- rnorm(sim, beta, se) original <- simulated %>% # no transformation @@ -180,15 +148,8 @@ identity_back <- function(beta, se, sim) { # identity (typo) TODO } -#' Back transform beta estimates for models with power-link -#' @param beta Analyst beta estimate. Numeric vector of length 1. -#' @param se Standard error of analyst's beta estimate. Numeric vector of length 1. -#' @param sim Number of simulations. Numeric vector of length 1. -#' @param n Numeric vector of length 1 describing power which values were raised to in transformation. -#' -#' @return data frame containing the mean estimate, its standard error, and quantiles +#' @describeIn back Back transform beta estimates for models with power-link #' @export -#' @family back transformation power_back <- function(beta, se, sim, n) { simulated <- rnorm(sim, beta, se) original <- pracma::nthroot(simulated, n = n) %>% # inverse of x^n, use non-base to allow for -ve numbers @@ -206,15 +167,9 @@ power_back <- function(beta, se, sim, n) { return(set) } -#' Back transform beta estimates or out-of-sample predictions from models whose response variable has been divided by some number -#' @param beta Analyst beta estimate. Numeric vector of length 1. -#' @param se Standard error of analyst's beta estimate. Numeric vector of length 1. -#' @param sim Number of simulations. Numeric vector of length 1. -#' @param n Numeric vector of length 1 describing the value of the divisor. -#' -#' @return data frame containing the mean estimate, its standard error, and quantiles +#' @describeIn back Back transform beta estimates or out-of-sample predictions from models whose response variable has been divided by some number, `n`. +#' @param n Denominator used by analyst to divide the response variable. #' @export -#' @family back transformation divide_back <- function(beta, se, sim, n) { simulated <- rnorm(sim, beta, se) original <- simulated * n %>% @@ -243,14 +198,8 @@ divide_back <- function(beta, se, sim, n) { return(set) } -#' Back transform beta estimates or out-of-sample predictions from models whose response variable has been transformed by the square root -#' @param beta Analyst beta estimate. Numeric vector of length 1. -#' @param se Standard error of analyst's beta estimate. Numeric vector of length 1. -#' @param sim Number of simulations. Numeric vector of length 1. -#' -#' @return data frame containing the mean estimate, its standard error, and quantiles +#' @describeIn back Back transform beta estimates or out-of-sample predictions from models whose response variable has been transformed by the square root #' @export -#' @family back transformation square_root_back <- function(beta, se, sim) { simulated <- rnorm(sim, beta, se) original <- simulated^2 %>% diff --git a/R/clean_response_transformation.R b/R/clean_response_transformation.R index b7f24aa..4176e38 100644 --- a/R/clean_response_transformation.R +++ b/R/clean_response_transformation.R @@ -13,12 +13,17 @@ #' used by the analyst. The `cleaned_transformation` values are the cleaned response transformation values that are equal to the required `transformation` values in [conversion()]. #' The user can supply an alternate table of transformations depending on what is required for the back-transformation functions. #' @family back-transformation functions +#' @seealso To be called after to [assign_transformation_type()] +#' @examples +#' clean_response_transformation("power2", ManyEcoEvo:::transformation_tbl) +#' clean_response_transformation("log", ManyEcoEvo:::transformation_tbl) +#' clean_response_transformation("new_transformation", ManyEcoEvo:::transformation_tbl ) # Returns NA if not found clean_response_transformation <- function(response_transformation, transformation_tbl = ManyEcoEvo:::transformation_tbl) { original_data <- tibble(transformation_orig = response_transformation) out <- original_data %>% - left_join(transformation_tbl) %>% + left_join(transformation_tbl, by = join_by(transformation_orig)) %>% select(cleaned_transformation) %>% # TODO WHAT ABOUT MISSING NON-STANDARD TRANSFORMATIONS?? flatten_chr() diff --git a/R/compute_MA_inputs.R b/R/compute_MA_inputs.R index 4d5fec0..39dc8eb 100644 --- a/R/compute_MA_inputs.R +++ b/R/compute_MA_inputs.R @@ -2,7 +2,7 @@ #' @description Computes the sorensen diversity indices and joins it to the prepared data in preparation for meta-analysing all subsets of data with `meta_analyse_datasets()`. #' #' @param ManyEcoEvo A dataframe grouped by the character columns `dataset`, `estimate_type`, `exclusion_set`. Each group corresponds to a subset of the full `dataset`, and has the subset analyst data stored in `data`, with its corresponding subset `diversity_data`. -#' @param estimate_type +#' @param estimate_type character string, one of "Zr", "yi", "y25", "y50", "y75". #' @details The name of the subset is derived from the funs generated by `subset_fns_yi` and/or `subset_fns_Zr`. #' #' Computes sorensen diversity indices `diversity_indices` for each subset of data returning them in the list-columns `diversity_indices` and joins them to the subset analyst `data`, returning it in the list-column `effects_analysis`. diff --git a/R/compute_metaanalysis_inputs.R b/R/compute_metaanalysis_inputs.R index 73e6773..e3c7e19 100644 --- a/R/compute_metaanalysis_inputs.R +++ b/R/compute_metaanalysis_inputs.R @@ -8,7 +8,7 @@ compute_metaanalysis_inputs <- function(.data, estimate_type = character(1L)) { # TODO insert checks that appropriate columns exist match.arg(estimate_type, choices = c("Zr", "yi", "y25", "y50", "y75"), several.ok = FALSE) - cli::cli_h1(glue::glue("Computing meta-analysis inputs", "for estimate type ", "{estimate_type}")) + cli::cli_h1(glue::glue("Computing meta-analysis inputs", " for estimate type ", "{estimate_type}")) if (estimate_type == "Zr") { # Convert Effect Sizes to Zr ------- @@ -40,13 +40,13 @@ compute_metaanalysis_inputs <- function(.data, estimate_type = character(1L)) { mutate( back_transformed_data = pmap( - .l = list( + .l = list( #TODO bug, missing argument augmented_data, link_function, response_transformation_description ), .f = ~ if (!rlang::is_na(..1) | !rlang::is_na(..2)) { - convert_predictions( + convert_predictions( #TODO bug, missing argument augmented_data = ..1, link_fun = ..2, response_transformation = ..3 diff --git a/R/conversion.R b/R/conversion.R index 7e0e542..8bd7508 100644 --- a/R/conversion.R +++ b/R/conversion.R @@ -1,5 +1,5 @@ -#' Conditionally apply back-transformation -#' @description Conditionally apply back-transformation functions depending on the value of `transformation` +#' Apply back-transformation to beta estimates +#' @description Conditionally apply back-transformation functions depending on the value of `transformation`. #' #' @param beta Beta estimate, numeric vector of length 1. #' @param se Standard error of the `beta` estimate, numeric vector of length 1 @@ -21,6 +21,11 @@ #' * "powerX", where `X` is a numeric #' * "divided.by.X", where `X` is a numeric #' @export +#' @import dplyr +#' @import purrr +#' @import cli +#' @import rlang +#' @import stringr conversion <- function(beta, se, transformation, sim = 10000) { # Ensure Correct Number of Arguments Supplied na_args <- purrr::discard(c(beta, se, transformation), is.na) %>% diff --git a/R/convert_predictions.R b/R/convert_predictions.R index e7facad..ea44002 100644 --- a/R/convert_predictions.R +++ b/R/convert_predictions.R @@ -4,6 +4,12 @@ #' @return A tibble of out of sample predictions on the response variable scale of the response variable used by the analyst #' @family analysis-values #' @export +#' @import dplyr +#' @import purrr +#' @import rlang +#' @import cli +#' @importFrom data.table setnames +#' @importFrom pointblank has_columns convert_predictions <- function(augmented_data, transformation_type, response_transformation, @@ -93,7 +99,6 @@ convert_predictions <- function(augmented_data, out <- rlang::na_cpl } - vconvert_double_transformation( beta = beta_vals, se = augmented_data$se.fit, diff --git a/R/exclude_extreme_VZ.R b/R/exclude_extreme_VZ.R index 921dfa6..ebfd943 100644 --- a/R/exclude_extreme_VZ.R +++ b/R/exclude_extreme_VZ.R @@ -1,6 +1,6 @@ #' Exclude extreme values of VZ from a dataframe of standardised predictions #' -#' @param df +#' @param df A dataframe containing the columns `Z` and `VZ` #' @param VZ_cutoff A numeric vector of length 1, values equal to or greater than this value of VZ will be filtered out of `df`. #' #' @return A dataframe with observations removed where the value is less than that of `VZ\_cutoff`. diff --git a/R/get_diversity_data.R b/R/get_diversity_data.R index b07e27b..47deb32 100644 --- a/R/get_diversity_data.R +++ b/R/get_diversity_data.R @@ -1,6 +1,6 @@ #' Get Diversity Data #' -#' @param raw_data +#' @param raw_data A tibble of raw data #' @param dataset character string of either "eucalyptus" or "blue tit" #' @param variables character vector of any length containing names of variables to get diversity data for #' diff --git a/R/prepare_response_variables_yi.R b/R/prepare_response_variables_yi.R index 9d38826..7c33444 100644 --- a/R/prepare_response_variables_yi.R +++ b/R/prepare_response_variables_yi.R @@ -41,7 +41,7 @@ prepare_response_variables_yi <- function(ManyEcoEvo, #' Back Transform Response Variables - yi #' -#' @param data +#' @param dat A dataframe of out of sample predictions analyst submission data #' @param estimate_type The type of estimate to be standardised. Character vector of length 1, whose value may be "yi", "y25", "y50", "y75". #' @param param_table A table of estimated 'population' parameters for each variable in the analysis datasets. #' @param dataset One of either "blue tit" or "eucalyptus" diff --git a/R/standardise_response.R b/R/standardise_response.R index bc26790..adcf09c 100644 --- a/R/standardise_response.R +++ b/R/standardise_response.R @@ -1,16 +1,25 @@ #' Standardise Response Variable #' -#' @param data +#' @param dat A tibble of analyst data with a list-column called #' @param estimate_type The type of estimate to be standardised. Character vector of length 1, whose value may be "Zr", "yi", "y25", "y50", "y75". #' @param param_table A table of estimated 'population' parameters for each variable in the analysis datasets. #' @param dataset One of either "blue tit" or "eucalyptus" #' #' @return A tibble of analyst data with standardised values contained in a list-column called 'back_transformed_data' #' @details +#' +#' When the `estimate_type` is `"Zr"`, [standardise_response()] standardises effect-sizes with [est_to_zr()], assuming that the `beta_estimate` and `beta_SE` values have already been back-transformed to the appropriate scale. #TODO check this. +#' +#' When the `estimate-type` is `"yi"` or otherwise, the function: +#' 1. assigns a `transformation_type` with [assign_transformation_type()], assumes that +#' 2. Converts the out-of-sample predictions on the link- or transformed-response scale back to the original response scale using [convert_predictions()]. +#' 3. Standardises predictions on the original response-scale to the Z-scale, with [pred_to_Z()]. +#' #' Note that for $y_i$ or out of sample predictions that are standardised, if param_table is `NA` or `NULL` for a given variable, then the response variable will not be standardised, and NA will be returned for that entry in `back_transformed_data`. #' #' @export #' @family analyst-data +#' @seealso [est_to_zr(), assign_transformation_type()] standardise_response <- function(dat, estimate_type = character(1L), param_table = NULL, @@ -20,7 +29,7 @@ standardise_response <- function(dat, match.arg(estimate_type, choices = c("Zr", "yi", "y25", "y50", "y75"), several.ok = FALSE) match.arg(dataset, choices = c("eucalyptus", "blue tit"), several.ok = FALSE) cli::cli_h1(glue::glue("Computing meta-analysis inputs", "for estimate type ", "{estimate_type}")) - + if (estimate_type == "Zr") { # Convert Effect Sizes to Zr ------- cli::cli_h2(paste0("Computing standardised effect sizes ", "{.code Zr}", " and variance ", "{.code VZr}")) @@ -56,20 +65,23 @@ standardise_response <- function(dat, analysis_id, split_id ) %>% - dplyr::mutate(params = purrr::map( - .x = response_variable_name, - .y = param_table, - .f = ~ dplyr::filter(.y, variable == .x) - )) %>% - dplyr::mutate(nrow_params = purrr::map_int(params, nrow)) %>% - dplyr::mutate(params = purrr::map2(params, - nrow_params, - .f = ~ if (.y > 0) { - .x - } else { - NA - } - )) %>% + dplyr::mutate(params = + purrr::map( + .x = response_variable_name, + .y = param_table, + .f = ~ dplyr::filter(.y, variable == .x) + )) %>% + dplyr::mutate(nrow_params = + purrr::map_int(params, nrow)) %>% + dplyr::mutate(params = + purrr::map2(params, + nrow_params, + .f = ~ if (.y > 0) { + .x + } else { + NA + } + )) %>% dplyr::select(-nrow_params) %>% dplyr::mutate( transformation_type = @@ -99,9 +111,9 @@ standardise_response <- function(dat, } ) ) - + cli::cli_h2(paste0("Standardising out-of-sample predictions")) - + dat <- dat %>% dplyr::mutate( back_transformed_data = # TODO rename standardised_data and fix up downstream dependencies @@ -119,7 +131,7 @@ standardise_response <- function(dat, ) ) } - + # TODO for any analyses implicitly excluded, return a message to the user return(dat) } diff --git a/R/utils.R b/R/utils.R index 890ac3a..c33e3c1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,8 +1,9 @@ #' Negative Value Matching #' -#' See \code{base::[](`%in%`)} for details. %nin% is a binary operator, returning a logical vector indicating if there is a negative match or not. +#' See \code{base::[](`%in%`)} for details. `%nin%` is a binary operator, +#' returning a logical vector indicating if there is a negative match or not. #' -#' @name %in% +#' @name %nin% #' @rdname NotIn #' @keywords internal #' @export @@ -10,13 +11,14 @@ #' @param lhs vector or NULL: the values to be matched. [Long vectors](http://127.0.0.1:59782/help/library/base/help/Long%20vectors) are supported. #' @param rhs vector or NULL: the values to be matched. [Long vectors](http://127.0.0.1:59782/help/library/base/help/Long%20vectors) are supported. #' @return A logical vector indicating which value of `lhs` are *not* matched in `rhs` +#' @examples +#' "A" %nin% LETTERS[1:10] +#' "A" %in% LETTERS[1:10] `%nin%` <- Negate(`%in%`) - - -#' Subsetting Functions for Zr analysis +#' Subsetting Functions for effect-size meta-analysis #' -#' @description Generates a list of functions that are used to subset the processed ManyEcoEvo dataset +#' @description Generates a list of functions that are used to subset the processed [ManyEcoEvo::] dataset #' #' @return A named list of `lambda` functions #' @export @@ -38,12 +40,20 @@ subset_fns_Zr <- function() { # TODO update calling of this fn (switch to fn rat return(out) } -#' Subsetting Functions for yi analysis +#' Subsetting Functions for out-of-sample predictions meta-analysis #' -#' @description Generates a list of functions that are used to subset the processed ManyEcoEvo dataset containing out-of-sample predictions \(\code{y\_i}\) +#' @description Generates a list of functions that are used to subset the +#' processed ManyEcoEvo dataset (e.g.`data(ManyEcoEvo_results`) containing +#' out-of-sample predictions $y_i$. +#' +#' @details +#' The subset functions are used to filter out predictions that are not +#' to be included in the meta-analysis. #' #' @return A named list of `lambda` functions #' @export +#' @import rlang +#' @import dplyr subset_fns_yi <- function() { out <- list( # TODO: which dataset and variable are the prediction exclusions contained?? subset_complete = rlang::as_function(~ .x %>% @@ -77,16 +87,20 @@ capwords <- function(s, strict = FALSE) { sapply(strsplit(s, split = " "), cap, USE.NAMES = !is.null(names(s))) } - - #' Removes infinite and NA values from a dataframe of standardised effects #' -#' @param effects_analysis -#' @param Z_colname unquoted or bare column name with the Z or Zr estimates -#' @param VZ_colname unquoted or bare column name containing the VZ or VZr estimates +#' @param effects_analysis A dataframe containing the standardised effects +#' @param Z_colname unquoted or bare column name with the $Z$ or $Z_r$ estimates +#' @param VZ_colname unquoted or bare column name containing the $VZ$ or $\\text{VZ}_r$ estimates #' #' @return a dataframe without #' @export +#' @examples +#' @import dplyr +#' data(ManyEcoEvo_results) +#' ManyEcoEvo_results %>% +#' pluck("effects_analysis", 1) %>% +#' rm_inf_na(beta_estimate, beta_SE) rm_inf_na <- function(effects_analysis, Z_colname, VZ_colname) { effects_analysis %>% filter( @@ -106,10 +120,14 @@ rm_inf_na <- function(effects_analysis, Z_colname, VZ_colname) { #' #' @return A named list of tibbles. Each tibble contains the rows of .tbl for the associated group and all the columns, including the grouping variables. Note that this returns a list_of which is slightly stricter than a simple list but is useful for representing lists where every element has the same type. #' @export -#' @importFrom dplyr group_split +#' @import dplyr +#' @import rlang +#' @import purrr #' @examples -#' named_group_split(ManyEcoEvo::euc_data, Property) -#' named_group_split(ManyEcoEvo::blue_tit_data, hatch_Area) +#' data(euc_data) +#' data(blue_tit_data) +#' named_group_split(euc_data, Property) +#' named_group_split(blue_tit_data, hatch_Area) named_group_split <- function(.data, grouping_variable) { .data %>% group_by({{ grouping_variable }}) %>% diff --git a/README.qmd b/README.qmd index 36e4f17..447840f 100644 --- a/README.qmd +++ b/README.qmd @@ -45,7 +45,7 @@ suppressPackageStartupMessages(library(targets)) targets::tar_read("ManyEcoEvo") ``` -The script that generates the ManyEcoEvo package datasets is located in [`ManyEcoEvo/data-raw/tar_make.R`](https://github.com/egouldo/ManyEcoEvo/blob/main/data-raw/tar_make.R). +The script that generates datasets used in the `ManyEcoEvo::` package is located in [`ManyEcoEvo/data-raw/tar_make.R`](https://github.com/egouldo/ManyEcoEvo/blob/main/data-raw/tar_make.R). ## License diff --git a/_pkgdown.yml b/_pkgdown.yml index 30c2667..76cf4aa 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -5,14 +5,15 @@ template: bootstrap: 5 reference: - title: Unprocessed data cleaning and checking - desc: Functions for cleaning and QA checking unprocessed analyst data + desc: | + Functions for cleaning and QA checking unprocessed analyst data - subtitle: Miscellaneous cleaning functions contents: - rm_inf_na - anonymise_teams - clean_response_transformation - assign_transformation_type -- subtitle: Working with analyst-provided out-of-sample predictions +- subtitle: Tidying analyst out-of-sample predictions desc: ~ contents: - augment_prediction_data @@ -21,27 +22,37 @@ reference: - read_submission_data - starts_with("validate_predictions") - title: Data Processing for Meta-analysis and Modelling - desc: Calculating and standardising variables for analysis + desc: | + Calculating and standardising variables for meta-analysis and modelling +- subtitle: Back-transforming / standardising analyst estimates contents: + - ends_with("_back") + - starts_with("back") + - conversion + - conversion_2 + - convert_predictions - est_to_zr - Z_VZ_preds - pred_to_Z + - standardise_response +- subtitle: Excluding Data from Meta-analysis + contents: - apply_VZ_exclusions + - starts_with("exclude_") + - starts_with("subset_fns") +- subtitle: Transforming and standardising meta-analysis variables + desc: | + Functions for computing and standardising response and predictor + variables for meta-analysis. + contents: - box_cox_transform - log_transform - calculate_deviation_score - calculate_sorensen_diversity_index - apply_sorensen_calc - - conversion - - conversion_2 - - convert_predictions - - ends_with("_back") - - back_transform_response_vars_yi - - starts_with("exclude_") - - starts_with("subset_fns") - - standardise_response - title: Process and create datasets for analysis - desc: Functions for creating datasets ready for meta-analysis or other analyses + desc: | + Functions for creating datasets ready for meta-analysis and modelling contents: - make_param_table - compute_MA_inputs @@ -49,17 +60,24 @@ reference: - get_diversity_data - starts_with("prepare_") - title: Model Fitting & Meta-analysis - desc: Functions for fitting meta-analysis and other models described ion *Gould + desc: | + Functions for fitting meta-analysis and other models described in *Gould et al. (2023)* contents: starts_with("fit_") - title: Extracting Analysis Outputs & Visualisation - desc: Functions for extracting model outputs, and visualising analysis results + desc: | + Functions for extracting model outputs, and visualising analysis results +- subtitle: Extracting meta-analysis outputs contents: - - gg_forest - - starts_with("plot_") - i2_ml - calc_I2_ml - apportion_heterogeneity_ml +- subtitle: Plotting + contents: + - gg_forest + - starts_with("plot_") +- subtitle: Model Checking and Comparison + contents: - compare_ml_MA - get_MA_fit_stats - run_model_checks @@ -94,4 +112,4 @@ reference: contents: - capwords - named_group_split - + - "%nin%" diff --git a/renv.lock b/renv.lock index b4ea63d..f41316d 100644 --- a/renv.lock +++ b/renv.lock @@ -460,6 +460,23 @@ ], "Hash": "40415719b5a479b87949f3aa0aee737c" }, + "bookdown": { + "Package": "bookdown", + "Version": "0.40", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "htmltools", + "jquerylib", + "knitr", + "rmarkdown", + "tinytex", + "xfun", + "yaml" + ], + "Hash": "896a79478a50c78fb035a37148638f4e" + }, "boot": { "Package": "boot", "Version": "1.3-30", diff --git a/vignettes/analysis.Rmd b/vignettes/analysis.Rmd index 3ce37c0..17daf68 100644 --- a/vignettes/analysis.Rmd +++ b/vignettes/analysis.Rmd @@ -1,6 +1,12 @@ --- title: "Analysing Many-Analyst Data" -output: rmarkdown::html_vignette +output: + bookdown::html_document2: + base_format: rmarkdown::html_vignette + fig_caption: yes + toc: true +pkgdown: + as_is: true vignette: > %\VignetteIndexEntry{analysis} %\VignetteEngine{knitr::rmarkdown} diff --git a/vignettes/data_cleaning_preparation.Rmd b/vignettes/data_cleaning_preparation.Rmd index 64ad18a..e06075d 100644 --- a/vignettes/data_cleaning_preparation.Rmd +++ b/vignettes/data_cleaning_preparation.Rmd @@ -1,6 +1,12 @@ --- title: "Data Cleaning & Preparation for Analysis" -output: rmarkdown::html_vignette +output: + bookdown::html_document2: + base_format: rmarkdown::html_vignette + fig_caption: yes + toc: true +pkgdown: + as_is: true vignette: > %\VignetteIndexEntry{data_cleaning_preparation} %\VignetteEngine{knitr::rmarkdown} @@ -16,58 +22,140 @@ knitr::opts_chunk$set( ```{r setup} library(ManyEcoEvo) +suppressPackageStartupMessages(library(tidyverse)) ``` ## Data Cleaning -Analysts may report estimates on various scales, for example they may report values on the link or response scales, they may also .... -In order to proceed with effect size standardisation or other standardisation, some transformations or back-transformations need to be applied. -- Anonymising submissions - `anonymise_teams()` / `anonymise_id_col()` -- back-transformation: - - We asked for estimates to be provided on the `______` scale. - - `clean_response_transformation()` - - `assign_back_transformation_type()` - - Depends on what scale the values are reported on (link / response), what transformation has been applied to the response variable, and to the predictor variable. +### Anonymising Data -## Data Preprocessing +We have anonymised our public dataset `data(ManyEcoEvo)` `anonymise_teams()`, which takes a look-up table of new and old identifier names with which to replace each analysis identifier. The lookup table and original non-anonymised data can be stored in a private repository or component, on the OSF for example, while the anonymised dataset can be released publicly. -what is the distinction between processing and cleaning? +## Data Pre-processing for Meta-analysis -### Back-transforming response variables +The meta-analysis requires that all estimates are on the same scale. This is because the meta-analysis is based on the assumption that the effect sizes are comparable. -in meta-analysis units need to be standardized because analysts use different response / outcome variables, and we may need to ensure that we are talking about units on the correct scale before we can standardize. +We provide the function `standardise_response()` to standardise a data-frame of analyst-data. -| function name | description | -| ----------------- | --------- | -| `log_back()` | | -| `logit_back()` | | -| `probit_back()` | | -| `inverse_back()` | | -| `square_back()` | | -| `cube_back()` | | -| `identity_back()` | | -| `power_back()` | | -| `divide_back()` | | -| `square_root_back()` | | +```{r demo_standardise_response_Zr} +data("ManyEcoEvo") +blue_tit_effect_sizes <- + ManyEcoEvo %>% + dplyr::filter(dataset == "blue tit") %>% + pluck("data", 1) %>% + slice(1:10) %>% + select(contains("id"), + -response_id_S2, + contains("beta"), + adjusted_df) -### Standardising effect sizes -- Standardisation of effect-sizes (fishers' Z), however other transformations could be applied using other packages if need be (Gurrindgi green meta-analsis handbook). - - Predictions - - `pred_to_Z()` (data frame level), `Z_VZ_preds()` - - Coefficients - - `est_to_Zr()` +blue_tit_effect_sizes -### Calculating Sorensen similarity index +standardise_response(dat = blue_tit_effect_sizes, + estimate_type = "Zr", + param_table = NULL, + dataset = "blue tit") %>% + select(id_col, contains("beta"), adjusted_df, Zr, VZr ) +``` -- `apply_sorensen_calc()` -- `calculate_sorensen_diversity_index()` (also needs to be renamed) +Note that if any of `beta_estimate`, `beta_SE` or `adjusted_df` are missing, `standardise_response()` is unable to compute standardised correlation coefficients $Z_r$ and the associated variance $\text{VZ}_r$. + +Below we standardise a data frame containing out-of-sample point-estimate predictions, which are stored in a list-column of dataframes, called `augmented_data`, notice some additional console messages about back-transformations, as well as an additional step *Transforming out of sample predictions from link to response scale*. That's because, depending on what `estimate_type` is being standardised, a different workflow will be implemented by `standardise_response()`. + +```{r demo_standardise_response_yi} +data("ManyEcoEvo_yi") + +blue_tit_predictions <- + ManyEcoEvo_yi %>% + dplyr::filter(dataset == "blue tit") %>% + pluck("data", 1) %>% + slice(1:5) + +blue_tit_standardised <- + standardise_response( + dat = blue_tit_predictions, + estimate_type = "yi" , + param_table = ManyEcoEvo:::analysis_data_param_tables, + dataset = "blue tit" + ) %>% + ungroup %>% + select( + id_col, + params, + contains("transformation"), + augmented_data, + back_transformed_data + ) + +blue_tit_standardised -### Box-cox transforming deviation from meta-analytic mean +``` + +### Standardising effect-sizes to $Z_r$ {#sec-standardisation} + +- Standardisation of effect-sizes (fishers' Z), however other transformations could be applied using other packages if need be (Gurrindgi green meta-analsis handbook). + +- Coefficients + - `est_to_Zr()` +### Standardising out-of-sample predictions to $Z_{y_i}$ +Before standardising out-of-sample predictions, we need to ensure that all estimates are on the same scale. Some analysts may report estimates on the link scale, while others may report estimates on the response scale, for instance. `ManyEcoEvo::` provides a suite of functions for both back-transforming estimates prior to standardising effect sizes. + +#### Cleaning response-transformation values and assigning a back-transformation + +Analysts may report estimates on various scales, for example they may report values on the link or response scales, they may also, or may have transformed the response-variable prior to model-fitting and reported effect-sizes on the transformed scale, rather than the scale of the original variable. + +In order to proceed with standardisation of effect-sizes or out-of-sample estimates, we back-transform analysts' reported estimates to the original response scale in the datasets `euc_data` and `blue_tit_data`, rather than the link- or transformed- scale. + +1. `assign_transformation_type()` takes information about the `response_transformation` and the `link_fun` for a given analysis, and assigns the analysis to an appropriate back-transformation rule to be applied, one of either `"identity"`, the value of the link-function or response-transformation, `"double.transformation"`, or `NA` if an appropriate transformation type cannot be assigned. +2. Next, the type of response transformation is cleaned using `clean_response_transformation()`, which cleans any value returned by `assign_transformation_type()` in step 1 that is not in `c("identity", "double.transformation", NA)` to a value in a lookup-tibble that assigns the appropriate transformation to apply. Users can supply their own lookup table, or else use or modify the version supplied in `ManyEcoEvo:::transformation_tbl`. +3. The estimates are now ready for back-transformation (section \@ref(sec-back-transformation)) and/or standardisation (section \@ref(sec-standardisation)). + +```{r} +#TODO demonstrate assign transformation and clean response transformation +``` + +#### Back-transforming analysts' reported out-of-sample predictions {#sec-back-transformation} + +| Function Name | Description | +|---------------------|---------------------------------------------------| +| `log_back()` | Back-transform beta estimates for models with log-link | +| `logit_back()` | Back-transform beta estimates for models with logit-link | +| `probit_back()` | Back-transform beta estimates for models with probit-link | +| `inverse_back()` | Back-transform beta estimates for models with $1/x$ link | +| `square_back()` | Back-transform beta estimates for models with $x^2$-link | +| `cube_back()` | Back-transform beta estimates for models with $x^3$-link | +| `identity_back()` | Back-transform beta estimates for models with identity-link | +| `power_back()` | Back-transform beta estimates for models with power-link | +| `divide_back()` | Back-transform beta estimates or out-of-sample predictions from models whose response variable has been divided by some number, `n` | +| `square_root_back()` | Back-transform beta estimates or out-of-sample predictions from models whose response variable has been transformed by the square root | + +We provide the `conversion()` function, which applies the relevant `back()` function depending on the required transformation assigned to that analysis: + +```{r} +#TODO demonstrate conversion() with back functions +``` + +#### Standardising out-of-sample predictions + +- `pred_to_Z()` (data frame level), `Z_VZ_preds()` + +```{r} +#TODO demonstrate application of V_Zr_preds() and or pred_to_z() +``` + +### Calculating Sorensen similarity index + +- `apply_sorensen_calc()` +- `calculate_sorensen_diversity_index()` (also needs to be renamed) + +### Box-cox transforming deviation from meta-analytic mean +### Excluding Data +- exclude_extreme_VZ() - exclude extreme values of VZ diff --git a/vignettes/multiple_datasets.Rmd b/vignettes/multiple_datasets.Rmd index 1844268..4183f0f 100644 --- a/vignettes/multiple_datasets.Rmd +++ b/vignettes/multiple_datasets.Rmd @@ -1,6 +1,12 @@ --- title: "Scaling Up: working with multiple subsets or multiple datasets" -output: rmarkdown::html_vignette +output: + bookdown::html_document2: + base_format: rmarkdown::html_vignette + fig_caption: yes + toc: true +pkgdown: + as_is: true vignette: > %\VignetteIndexEntry{multiple_datasets} %\VignetteEngine{knitr::rmarkdown}