Skip to content

Commit

Permalink
Merge pull request #121 from egouldo/118-log-transform-yi
Browse files Browse the repository at this point in the history
118 log transform yi
  • Loading branch information
egouldo authored Aug 28, 2024
2 parents cfc17ed + 807c548 commit 98ae1c1
Show file tree
Hide file tree
Showing 147 changed files with 4,425 additions and 2,301 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: ManyEcoEvo
Title: Meta-analyse data from 'Many-Analysts' style studies
Version: 2.4.2
Version: 2.7.0
Authors@R: c(
person("Elliot", "Gould", , "elliot.gould@unimelb.edu.au", role = c("aut", "cre"),
comment = c(ORCID = "https://orcid.org/0000-0002-6585-538X")),
Expand Down
2 changes: 1 addition & 1 deletion ManyEcoEvo.Rproj
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ LaTeX: pdfLaTeX
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace,vignette
PackageRoxygenize: rd,collate,namespace
114 changes: 99 additions & 15 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ export(fit_metafor_uni)
export(fit_multivar_MA)
export(fit_sorensen_glm)
export(fit_uni_mixed_effects)
export(folded_params)
export(generate_collinearity_subset)
export(generate_exclusion_subsets)
export(generate_expertise_subsets)
Expand All @@ -59,6 +60,8 @@ export(identity_back)
export(inverse_back)
export(log_back)
export(log_transform)
export(log_transform_response)
export(log_transform_yi)
export(logit_back)
export(make_param_table)
export(make_viz)
Expand All @@ -85,8 +88,10 @@ export(preprocess_prediction_files)
export(preprocess_updated_prediction_files)
export(probit_back)
export(read_submission_data)
export(rename_prediction_cols)
export(rm_inf_na)
export(run_model_checks)
export(split_yi_subsets)
export(square_back)
export(square_root_back)
export(standardise_response)
Expand All @@ -106,72 +111,151 @@ export(summarise_study)
export(summarise_variable_counts)
export(validate_predictions)
export(validate_predictions_df_blue_tit)
export(validate_predictions_df_euc)
import(NatParksPalettes)
import(broom)
import(broom.mixed)
import(cli)
export(variance_box_cox)
import(dplyr)
import(forcats)
import(ggbeeswarm)
import(ggforestplot)
import(ggplot2)
import(lme4)
import(metafor)
import(purrr)
import(recipes)
import(rlang)
import(see)
import(stringr)
import(tidyr)
importFrom(EnvStats,stat_n_text)
importFrom(broom.mixed,tidy)
importFrom(NatParksPalettes,scale_color_natparks_d)
importFrom(betapart,beta.pair)
importFrom(broom,tidy)
importFrom(cli,cli_abort)
importFrom(cli,cli_alert)
importFrom(cli,cli_alert_danger)
importFrom(cli,cli_alert_info)
importFrom(cli,cli_alert_success)
importFrom(cli,cli_alert_warning)
importFrom(cli,cli_bullets)
importFrom(cli,cli_h1)
importFrom(cli,cli_h2)
importFrom(cli,cli_ol)
importFrom(cli,cli_warn)
importFrom(cli,style_italic)
importFrom(data.table,setnames)
importFrom(forcats,as_factor)
importFrom(forcats,fct_relevel)
importFrom(forcats,fct_reorder)
importFrom(fs,file_exists)
importFrom(ggbeeswarm,geom_quasirandom)
importFrom(ggforestplot,theme_forest)
importFrom(ggplot2,aes)
importFrom(ggplot2,coord_flip)
importFrom(ggplot2,element_line)
importFrom(ggplot2,element_text)
importFrom(ggplot2,geom_pointrange)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,guides)
importFrom(ggplot2,labs)
importFrom(ggplot2,theme)
importFrom(glue,glue)
importFrom(lifecycle,deprecated)
importFrom(lme4,lmer)
importFrom(magrittr,"%>%")
importFrom(metafor,rma.mv)
importFrom(metaviz,viz_funnel)
importFrom(orchaRd,i2_ml)
importFrom(parameters,parameters)
importFrom(parsnip,fit)
importFrom(parsnip,linear_reg)
importFrom(performance,performance)
importFrom(pointblank,action_levels)
importFrom(pointblank,col_exists)
importFrom(pointblank,col_is_character)
importFrom(pointblank,col_is_integer)
importFrom(pointblank,col_is_numeric)
importFrom(pointblank,col_vals_in_set)
importFrom(pointblank,col_vals_not_null)
importFrom(pointblank,create_agent)
importFrom(pointblank,expect_col_exists)
importFrom(pointblank,expect_col_is_character)
importFrom(pointblank,expect_col_is_numeric)
importFrom(pointblank,expect_col_vals_in_set)
importFrom(pointblank,has_columns)
importFrom(pointblank,stop_if_not)
importFrom(pointblank,test_col_exists)
importFrom(pointblank,test_col_vals_gte)
importFrom(pointblank,vars)
importFrom(pointblank,warn_on_fail)
importFrom(purrr,discard)
importFrom(purrr,exec)
importFrom(purrr,flatten_dbl)
importFrom(purrr,is_scalar_vector)
importFrom(purrr,keep)
importFrom(purrr,keep_at)
importFrom(purrr,list_c)
importFrom(purrr,list_flatten)
importFrom(purrr,list_rbind)
importFrom(purrr,map)
importFrom(purrr,map2)
importFrom(purrr,map_chr)
importFrom(purrr,map_dfr)
importFrom(purrr,map_if)
importFrom(purrr,map_int)
importFrom(purrr,map_lgl)
importFrom(purrr,pluck)
importFrom(purrr,pmap)
importFrom(purrr,possibly)
importFrom(purrr,reduce)
importFrom(purrr,reduce2)
importFrom(purrr,set_names)
importFrom(purrr,simplify)
importFrom(purrr,transpose)
importFrom(readr,read_csv)
importFrom(recipes,juice)
importFrom(recipes,prep)
importFrom(recipes,recipe)
importFrom(recipes,step_mutate)
importFrom(recipes,step_naomit)
importFrom(recipes,tidy)
importFrom(recipes,update_role)
importFrom(rlang,"!!")
importFrom(rlang,":=")
importFrom(rlang,as_function)
importFrom(rlang,as_string)
importFrom(rlang,caller_env)
importFrom(rlang,enquo)
importFrom(rlang,ensym)
importFrom(rlang,exec)
importFrom(rlang,expr)
importFrom(rlang,exprs)
importFrom(rlang,f_lhs)
importFrom(rlang,inject)
importFrom(rlang,is_call)
importFrom(rlang,is_list)
importFrom(rlang,is_na)
importFrom(rlang,is_null)
importFrom(rlang,na_chr)
importFrom(rlang,new_formula)
importFrom(sae,bxcx)
importFrom(see,geom_jitter2)
importFrom(see,scale_fill_material_d)
importFrom(see,theme_modern)
importFrom(stringr,str_detect)
importFrom(stringr,str_remove)
importFrom(stringr,str_split)
importFrom(stringr,str_starts)
importFrom(tibble,as_tibble)
importFrom(tibble,as_tibble_row)
importFrom(tibble,column_to_rownames)
importFrom(tibble,enframe)
importFrom(tibble,rownames_to_column)
importFrom(tibble,tibble)
importFrom(tidyr,any_of)
importFrom(tidyr,drop_na)
importFrom(tidyr,hoist)
importFrom(tidyr,nest)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,pivot_wider)
importFrom(tidyr,separate)
importFrom(tidyr,tibble)
importFrom(tidyr,unite)
importFrom(tidyr,unnest)
importFrom(tidyr,unnest_longer)
importFrom(tidyselect,all_of)
importFrom(tidyselect,where)
importFrom(timetk,step_box_cox)
importFrom(workflows,add_model)
importFrom(workflows,add_recipe)
importFrom(workflows,extract_fit_parsnip)
importFrom(workflows,workflow)
32 changes: 29 additions & 3 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,31 @@
# ManyEcoEvo 2.4.2
# ManyEcoEvo (development version)

# ManyEcoEvo 2.4.1
<!-- NEWS.md is maintained by https://cynkra.github.io/fledge, do not edit -->

* Initial CRAN submission.
- #118 docs: Add explanation about updated behaviour when `estimate_type` is missing in `ManyEcoEvo` dataframe
- #118 build: devtools::document()

# ManyEcoEvo 2.6.0

- Update arg supply to targets call on prepare_response_variables() after #118 updates
- #118 add pmap internal helper function for differential application of transformation / standardisation in `standardise_response()`
- #118 delete old pmap helper function
- `dat` to `data` to help with auto-matching in pmap within prepare_response_variables() wrapper #118
- ensure all family fns have ... arg for pmap application in prepare_response_variables() since all fns have different argument lengths and names
- accidentally deleted when upgrading for #118, have added creation of transform_datasets tibbles for all cases now, and then these will apply the appropriate functions in final code chunk at end
- #118 ensure application of Z_VZ_preds takes the generalised colnames yi, yi_se instead of using hard-coded dataset application #97
- #118 call new arg `dataset_log_transform` in fn to log-transform outcomes for euc yi analysis
- #118 add log-transformation equivalent to `standardise_response()` and `process_resonse()`
- #102 add function documentation, including examples
- #118 extract `lower` and `upper` transformed vals in line with addition of `log_transform_response()` / changes to `standardise_response()`
- #116 check appropriate required variable (i.e. function needs `back_transformed_data`, but checked for `augmented_data` in `dat` arg, wouldn't throw required error because `augmented_data` was present in `dat`
- #102 add import, return, and see also roxygen doc tags, replace note with details tag, rename fn doc title
- #116 update argument checks conditional expression
- #118 match output to `log_transform_yi()` (now returns additional cols `lower` and `upper`, not only `c("Z","VZ")`)
- #118 match process to `log_transform_yi()` and #97 generalise processing to both euc/bt datasets without hard-coding dataset names in fns, and remove associated dataset-specific argument checking #116
- #118 adapt response variable preparation to accept additional argument `dataset_log_transform` apply argument checks #116, add roxygen param #102
- #118 adapt response variable processing to accept either/or/none for dataset standardisation/log-transformation.
- equivalent to `pred_to_z()`
- #102 write documentation
- #102 add import tags for `log_transform()` and link to equivalent functions, apply default argument values / checks
- #97 rename out argument
1 change: 0 additions & 1 deletion R/ManyEcoEvo-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
"_PACKAGE"

## usethis namespace: start
#' @import rlang
#' @importFrom glue glue
#' @importFrom lifecycle deprecated
## usethis namespace: end
Expand Down
2 changes: 1 addition & 1 deletion R/anonymise_teams.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#' @return A `df` with anonymised values of `id_col` based on the `New_Identifier` colum of `lookup`
#' @export
#' @importFrom pointblank col_vals_not_null
#' @import tidyr
#' @importFrom tidyr separate unite
#' @import dplyr
anonymise_teams <- function(df, lookup) { # TODO actually... this is anonymise_id_col()
df %>%
Expand Down
124 changes: 116 additions & 8 deletions R/apply_VZ_exclusions.R
Original file line number Diff line number Diff line change
@@ -1,22 +1,130 @@
#' Apply VZ exclusion to a data-frame containing list-columns of yi subsets
#'
#' @param df A dataframe of yi data subsets generated from `generate_yi_subsets\(\)`.
#' @param VZ_cutoff A numeric vector of length 1, values equal to or greater than this value of VZ will be filtered out of the dataframes stored in `df`'s list-column `data`.
#'
#' @param df A dataframe of yi data subsets generated by [generate_yi_subsets()] or [split_yi_subsets()].
#' @param VZ_cutoff A numeric vector of length 1, values equal to or greater than this value of VZ will be filtered out of the dataframes stored in `df`'s list-column `data`, else a named list of numeric values, where the names are the dataset names and the values are the `VZ_cutoff`s for each `dataset` in `df`.
#' @param VZ_colname Either A character vector of length 1, the name of the column in the dataframes stored in `df`'s list-column `data` that contains the VZ values. Or else a named list of character values, where the names are the dataset names and the values are the `VZ_colname`s for each `dataset` in `df`.
#' @return A dataframe of yi subsets, whose extreme values of VZ have been removed.
#' @export
#' @import dplyr
#' @importFrom purrr map map2
#' @importFrom pointblank col_exists
#' @importFrom cli cli_alert_warning
#' @seealso Applies [exclude_extreme_VZ()] to each dataframe in the list-column `data` of `df`.
#' @family Multi-dataset Wrapper Functions
apply_VZ_exclusions <- function(df = data.frame(), VZ_cutoff = numeric(1L)) {
pointblank::col_exists(df, columns = c("data", "diversity_data"))

#' @details
#' `df` must contain the columns `"data"`, `"diversity_data"` and `"dataset"`.
#' If only one value of `VZ_colname` and `VZ_cutoff` is supplied, it will be recycled to match the number of datasets in `df`.
#'
#' If a named list is supplied for `VZ_colname` and `VZ_cutoff`, the names must match the dataset names in `df`.
#' @examples
#' data(ManyEcoEvo_yi)
#' ManyEcoEvo_yi %>%
#' prepare_response_variables(
#' estimate_type = "yi",
#' param_table =
#' ManyEcoEvo:::analysis_data_param_tables,
#' dataset_standardise = "blue tit",
#' dataset_log_transform = "eucalyptus") %>%
#' generate_yi_subsets() %>%
#' apply_VZ_exclusions(VZ_colname =
#' list("eucalyptus" = "se_log",
#' "blue tit" = "VZ"),
#' VZ_cutoff = 3)
apply_VZ_exclusions <- function(df = data.frame(), VZ_colname, VZ_cutoff) {
# ---- Argument Checking -----
pointblank::col_exists(df, columns = c("data", "diversity_data", "dataset"))

if (!is.null(names(VZ_colname))) {
pointblank::expect_col_vals_make_set(object = df,
columns = dataset,
set = names(VZ_colname))
}

if (!is.null(names(VZ_cutoff))) {
pointblank::expect_col_vals_make_set(object = df,
columns = dataset,
set = names(VZ_cutoff))
}

cli::cli_h1("Applying VZ exclusions")

if (is.list(VZ_colname)) {
map(VZ_colname, ~ {
stopifnot(
is.character(.x)
)
})
} else {
stopifnot(
is.character(VZ_colname)
)
# ----- Format VZ exclusions when VZ_colname is not list -----
if (length(VZ_colname) < length(unique(df$dataset))) {
cli::cli_alert_warning("{.arg VZ_colname} = {.val {VZ_cutoff}} was recycled to match the number of unique datasets in {.arg df}.")
VZ_colname <- rep(VZ_colname, length(unique(df$dataset)))
}
}

if (is.list(VZ_cutoff)) {
map(VZ_cutoff, ~ {
stopifnot(
is.numeric(.x)
)
})
} else {
stopifnot(
is.numeric(VZ_cutoff)
)
# ----- Format VZ exclusions when VZ_cutoff is not list -----
if (length(VZ_cutoff) < length(unique(df$dataset))) {
cli::cli_alert_warning("{.arg VZ_cutoff} = {.val {VZ_cutoff}} was recycled to match the number of unique datasets in {.arg df}.")
VZ_cutoff <- rep(VZ_cutoff, length(unique(df$dataset)))
}
}

# ----- Create formulas for matching VZ_cutoff and VZ_colname to df$dataset -----

if (is.list(VZ_colname)) {
formulae_match_VZ_colname <- map2(names(VZ_colname),
VZ_colname,
rlang::new_formula)
} else {
formulae_match_VZ_colname <- map2(unique(df$dataset),
VZ_colname,
rlang::new_formula)
}

if (is.list(VZ_cutoff)) {
formulae_match_VZ_cutoff <- map2(names(VZ_cutoff),
VZ_cutoff,
rlang::new_formula)
} else {
formulae_match_VZ_cutoff <- map2(unique(df$dataset),
VZ_cutoff,
rlang::new_formula)
}

# ----- Apply VZ exclusions -----
df_out <- df %>%
ungroup() %>%
mutate(
data = map(data, exclude_extreme_VZ, !!{{ VZ_cutoff }}), # TODO check whether we should run on effects_analysis instead of data
VZ_colname_val = case_match(dataset,
!!!formulae_match_VZ_colname,
.default = NA),
VZ_cutoff_val = case_match(dataset,
!!!formulae_match_VZ_cutoff,
.default = NA),
data = pmap( # TODO check whether we should run on effects_analysis instead of data
list(data, VZ_colname_val, VZ_cutoff_val),
~ exclude_extreme_VZ(df = ..1, VZ_colname = ..2, VZ_cutoff = ..3)
),
diversity_data = map2(
.x = diversity_data,
.y = data,
.f = ~ semi_join(.x, .y, by = "id_col")
)
)
) %>%
select(-VZ_colname_val, -VZ_cutoff_val)

return(df_out)
}
Loading

0 comments on commit 98ae1c1

Please sign in to comment.