-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #121 from egouldo/118-log-transform-yi
118 log transform yi
- Loading branch information
Showing
147 changed files
with
4,425 additions
and
2,301 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,31 @@ | ||
# ManyEcoEvo 2.4.2 | ||
# ManyEcoEvo (development version) | ||
|
||
# ManyEcoEvo 2.4.1 | ||
<!-- NEWS.md is maintained by https://cynkra.github.io/fledge, do not edit --> | ||
|
||
* Initial CRAN submission. | ||
- #118 docs: Add explanation about updated behaviour when `estimate_type` is missing in `ManyEcoEvo` dataframe | ||
- #118 build: devtools::document() | ||
|
||
# ManyEcoEvo 2.6.0 | ||
|
||
- Update arg supply to targets call on prepare_response_variables() after #118 updates | ||
- #118 add pmap internal helper function for differential application of transformation / standardisation in `standardise_response()` | ||
- #118 delete old pmap helper function | ||
- `dat` to `data` to help with auto-matching in pmap within prepare_response_variables() wrapper #118 | ||
- ensure all family fns have ... arg for pmap application in prepare_response_variables() since all fns have different argument lengths and names | ||
- accidentally deleted when upgrading for #118, have added creation of transform_datasets tibbles for all cases now, and then these will apply the appropriate functions in final code chunk at end | ||
- #118 ensure application of Z_VZ_preds takes the generalised colnames yi, yi_se instead of using hard-coded dataset application #97 | ||
- #118 call new arg `dataset_log_transform` in fn to log-transform outcomes for euc yi analysis | ||
- #118 add log-transformation equivalent to `standardise_response()` and `process_resonse()` | ||
- #102 add function documentation, including examples | ||
- #118 extract `lower` and `upper` transformed vals in line with addition of `log_transform_response()` / changes to `standardise_response()` | ||
- #116 check appropriate required variable (i.e. function needs `back_transformed_data`, but checked for `augmented_data` in `dat` arg, wouldn't throw required error because `augmented_data` was present in `dat` | ||
- #102 add import, return, and see also roxygen doc tags, replace note with details tag, rename fn doc title | ||
- #116 update argument checks conditional expression | ||
- #118 match output to `log_transform_yi()` (now returns additional cols `lower` and `upper`, not only `c("Z","VZ")`) | ||
- #118 match process to `log_transform_yi()` and #97 generalise processing to both euc/bt datasets without hard-coding dataset names in fns, and remove associated dataset-specific argument checking #116 | ||
- #118 adapt response variable preparation to accept additional argument `dataset_log_transform` apply argument checks #116, add roxygen param #102 | ||
- #118 adapt response variable processing to accept either/or/none for dataset standardisation/log-transformation. | ||
- equivalent to `pred_to_z()` | ||
- #102 write documentation | ||
- #102 add import tags for `log_transform()` and link to equivalent functions, apply default argument values / checks | ||
- #97 rename out argument |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,130 @@ | ||
#' Apply VZ exclusion to a data-frame containing list-columns of yi subsets | ||
#' | ||
#' @param df A dataframe of yi data subsets generated from `generate_yi_subsets\(\)`. | ||
#' @param VZ_cutoff A numeric vector of length 1, values equal to or greater than this value of VZ will be filtered out of the dataframes stored in `df`'s list-column `data`. | ||
#' | ||
#' @param df A dataframe of yi data subsets generated by [generate_yi_subsets()] or [split_yi_subsets()]. | ||
#' @param VZ_cutoff A numeric vector of length 1, values equal to or greater than this value of VZ will be filtered out of the dataframes stored in `df`'s list-column `data`, else a named list of numeric values, where the names are the dataset names and the values are the `VZ_cutoff`s for each `dataset` in `df`. | ||
#' @param VZ_colname Either A character vector of length 1, the name of the column in the dataframes stored in `df`'s list-column `data` that contains the VZ values. Or else a named list of character values, where the names are the dataset names and the values are the `VZ_colname`s for each `dataset` in `df`. | ||
#' @return A dataframe of yi subsets, whose extreme values of VZ have been removed. | ||
#' @export | ||
#' @import dplyr | ||
#' @importFrom purrr map map2 | ||
#' @importFrom pointblank col_exists | ||
#' @importFrom cli cli_alert_warning | ||
#' @seealso Applies [exclude_extreme_VZ()] to each dataframe in the list-column `data` of `df`. | ||
#' @family Multi-dataset Wrapper Functions | ||
apply_VZ_exclusions <- function(df = data.frame(), VZ_cutoff = numeric(1L)) { | ||
pointblank::col_exists(df, columns = c("data", "diversity_data")) | ||
|
||
#' @details | ||
#' `df` must contain the columns `"data"`, `"diversity_data"` and `"dataset"`. | ||
#' If only one value of `VZ_colname` and `VZ_cutoff` is supplied, it will be recycled to match the number of datasets in `df`. | ||
#' | ||
#' If a named list is supplied for `VZ_colname` and `VZ_cutoff`, the names must match the dataset names in `df`. | ||
#' @examples | ||
#' data(ManyEcoEvo_yi) | ||
#' ManyEcoEvo_yi %>% | ||
#' prepare_response_variables( | ||
#' estimate_type = "yi", | ||
#' param_table = | ||
#' ManyEcoEvo:::analysis_data_param_tables, | ||
#' dataset_standardise = "blue tit", | ||
#' dataset_log_transform = "eucalyptus") %>% | ||
#' generate_yi_subsets() %>% | ||
#' apply_VZ_exclusions(VZ_colname = | ||
#' list("eucalyptus" = "se_log", | ||
#' "blue tit" = "VZ"), | ||
#' VZ_cutoff = 3) | ||
apply_VZ_exclusions <- function(df = data.frame(), VZ_colname, VZ_cutoff) { | ||
# ---- Argument Checking ----- | ||
pointblank::col_exists(df, columns = c("data", "diversity_data", "dataset")) | ||
|
||
if (!is.null(names(VZ_colname))) { | ||
pointblank::expect_col_vals_make_set(object = df, | ||
columns = dataset, | ||
set = names(VZ_colname)) | ||
} | ||
|
||
if (!is.null(names(VZ_cutoff))) { | ||
pointblank::expect_col_vals_make_set(object = df, | ||
columns = dataset, | ||
set = names(VZ_cutoff)) | ||
} | ||
|
||
cli::cli_h1("Applying VZ exclusions") | ||
|
||
if (is.list(VZ_colname)) { | ||
map(VZ_colname, ~ { | ||
stopifnot( | ||
is.character(.x) | ||
) | ||
}) | ||
} else { | ||
stopifnot( | ||
is.character(VZ_colname) | ||
) | ||
# ----- Format VZ exclusions when VZ_colname is not list ----- | ||
if (length(VZ_colname) < length(unique(df$dataset))) { | ||
cli::cli_alert_warning("{.arg VZ_colname} = {.val {VZ_cutoff}} was recycled to match the number of unique datasets in {.arg df}.") | ||
VZ_colname <- rep(VZ_colname, length(unique(df$dataset))) | ||
} | ||
} | ||
|
||
if (is.list(VZ_cutoff)) { | ||
map(VZ_cutoff, ~ { | ||
stopifnot( | ||
is.numeric(.x) | ||
) | ||
}) | ||
} else { | ||
stopifnot( | ||
is.numeric(VZ_cutoff) | ||
) | ||
# ----- Format VZ exclusions when VZ_cutoff is not list ----- | ||
if (length(VZ_cutoff) < length(unique(df$dataset))) { | ||
cli::cli_alert_warning("{.arg VZ_cutoff} = {.val {VZ_cutoff}} was recycled to match the number of unique datasets in {.arg df}.") | ||
VZ_cutoff <- rep(VZ_cutoff, length(unique(df$dataset))) | ||
} | ||
} | ||
|
||
# ----- Create formulas for matching VZ_cutoff and VZ_colname to df$dataset ----- | ||
|
||
if (is.list(VZ_colname)) { | ||
formulae_match_VZ_colname <- map2(names(VZ_colname), | ||
VZ_colname, | ||
rlang::new_formula) | ||
} else { | ||
formulae_match_VZ_colname <- map2(unique(df$dataset), | ||
VZ_colname, | ||
rlang::new_formula) | ||
} | ||
|
||
if (is.list(VZ_cutoff)) { | ||
formulae_match_VZ_cutoff <- map2(names(VZ_cutoff), | ||
VZ_cutoff, | ||
rlang::new_formula) | ||
} else { | ||
formulae_match_VZ_cutoff <- map2(unique(df$dataset), | ||
VZ_cutoff, | ||
rlang::new_formula) | ||
} | ||
|
||
# ----- Apply VZ exclusions ----- | ||
df_out <- df %>% | ||
ungroup() %>% | ||
mutate( | ||
data = map(data, exclude_extreme_VZ, !!{{ VZ_cutoff }}), # TODO check whether we should run on effects_analysis instead of data | ||
VZ_colname_val = case_match(dataset, | ||
!!!formulae_match_VZ_colname, | ||
.default = NA), | ||
VZ_cutoff_val = case_match(dataset, | ||
!!!formulae_match_VZ_cutoff, | ||
.default = NA), | ||
data = pmap( # TODO check whether we should run on effects_analysis instead of data | ||
list(data, VZ_colname_val, VZ_cutoff_val), | ||
~ exclude_extreme_VZ(df = ..1, VZ_colname = ..2, VZ_cutoff = ..3) | ||
), | ||
diversity_data = map2( | ||
.x = diversity_data, | ||
.y = data, | ||
.f = ~ semi_join(.x, .y, by = "id_col") | ||
) | ||
) | ||
) %>% | ||
select(-VZ_colname_val, -VZ_cutoff_val) | ||
|
||
return(df_out) | ||
} |
Oops, something went wrong.