Skip to content

Commit

Permalink
- fix #75 ensure columns not rm in rating subset generation, and that…
Browse files Browse the repository at this point in the history
… review data cols are all re-nested

#102 add note in documentation explaining data requirements
  • Loading branch information
egouldo committed Sep 10, 2024
1 parent 416ff44 commit 5391322
Showing 1 changed file with 28 additions and 36 deletions.
64 changes: 28 additions & 36 deletions R/generate_rating_subsets.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,57 +10,50 @@
#' @family targets-pipeline functions
#' @details
#' To be executed on a ManyEcoEvo dataframe after the `compute_MA_inputs()` function. This function generates two subsets of data based on the peer review ratings for both complete and partial exclusion datasets for both `yi` and `Zr` estimates. The subsets are generated based on the `PublishableAsIs` column in the `data` list-column. The subsets are named `data_flawed` and `data_flawed_major` and are created by filtering out the data points with the `PublishableAsIs` values of `"deeply flawed and unpublishable"` and `"publishable with major revision"` respectively. The `diversity_data` list-column is also updated to reflect the new subsets of data.
#'
#' Note, This function expects that within the list-column `data`, there is a list-column `review_data` containing the columns `ReviewerId`, `RateAnalysis`, `PublishableAsIs`, and `PriorBelief`.
#' @import dplyr
#' @importFrom purrr map2 map
#' @importFrom forcats fct_relevel as_factor
#' @importFrom tidyr unnest pivot_longer nest
#' @importFrom stringr str_detect
generate_rating_subsets <- function(ManyEcoEvo) {

out <- ManyEcoEvo %>%
filter(exclusion_set == "complete" | exclusion_set == "partial") %>%
mutate(
data =
map(data,
.f = ~ .x %>%
unnest(review_data) %>%
select(
Zr, VZr,
id_col,
PublishableAsIs,
ReviewerId,
TeamIdentifier,
RateAnalysis,
mixed_model
) %>%
mutate(
PublishableAsIs =
forcats::as_factor(PublishableAsIs) %>%
.f = ~ .x %>%
unnest(review_data) %>%
mutate(
PublishableAsIs =
forcats::as_factor(PublishableAsIs) %>%
forcats::fct_relevel(c(
"deeply flawed and unpublishable",
"publishable with major revision",
"publishable with minor revision",
"publishable as is"
))
)
)
)
) %>%
mutate(
rm_flawed =
map(data,
.f = ~ .x %>%
group_by(PublishableAsIs, id_col) %>%
count() %>%
filter(str_detect(PublishableAsIs, pattern = "flawed")) %>%
pull(id_col)
.f = ~ .x %>%
group_by(PublishableAsIs, id_col) %>%
count() %>%
filter(str_detect(PublishableAsIs, pattern = "flawed")) %>%
pull(id_col)
),
rm_flawed_major =
map(data,
.f = ~ .x %>%
group_by(PublishableAsIs, id_col) %>%
count() %>%
filter(str_detect(PublishableAsIs, pattern = "flawed|major")) %>%
pull(id_col)
.f = ~ .x %>%
group_by(PublishableAsIs, id_col) %>%
count() %>%
filter(str_detect(PublishableAsIs, pattern = "flawed|major")) %>%
pull(id_col)
)
) %>%
mutate(
Expand All @@ -83,11 +76,12 @@ generate_rating_subsets <- function(ManyEcoEvo) {
values_to = "data"
) %>%
select(-starts_with("rm_")) %>%
mutate(data = map(data,
.f = ~ group_by(.x, id_col) %>%
nest(review_data = c(ReviewerId, RateAnalysis, PublishableAsIs)) %>%
ungroup()
)) %>%
mutate(data =
map(data,
.f = ~ group_by(.x, id_col) %>%
nest(review_data = c(ReviewerId, RateAnalysis, PublishableAsIs, PriorBelief)) %>%
ungroup()
)) %>%
mutate(
diversity_data =
map2(
Expand All @@ -96,15 +90,13 @@ generate_rating_subsets <- function(ManyEcoEvo) {
.f = ~ semi_join(.x, .y, by = join_by(id_col)) %>% distinct()
)
)

# DON"T FORGET WE NEED TO RE DO THE DIVERSITY DATA!! TO DEAL WITH REMOVED DATA POINTS!

# THEN BIND ROWS WITH PREVIOUS DATASETS

# Bind new subsets with input data
out <- bind_rows(
ManyEcoEvo %>%
mutate(publishable_subset = "All"),
out
)

return(out)
}

0 comments on commit 5391322

Please sign in to comment.