- fix #75 ensure columns not rm in rating subset generation, and that…

… review data cols are all re-nested #102 add note in documentation explaining data requirements
egouldo · Sep 10, 2024 · 5391322 · 5391322
1 parent 416ff44
commit 5391322
Showing 1 changed file with 28 additions and 36 deletions.
diff --git a/R/generate_rating_subsets.R b/R/generate_rating_subsets.R
@@ -10,57 +10,50 @@
 #' @family targets-pipeline functions
 #' @details 
 #' To be executed on a ManyEcoEvo dataframe after the `compute_MA_inputs()` function. This function generates two subsets of data based on the peer review ratings for both complete and partial exclusion datasets for both `yi` and `Zr` estimates. The subsets are generated based on the `PublishableAsIs` column in the `data` list-column. The subsets are named `data_flawed` and `data_flawed_major` and are created by filtering out the data points with the `PublishableAsIs` values of `"deeply flawed and unpublishable"` and `"publishable with major revision"` respectively. The `diversity_data` list-column is also updated to reflect the new subsets of data.
+#'  
+#' Note, This function expects that within the list-column `data`, there is a list-column `review_data` containing the columns `ReviewerId`, `RateAnalysis`, `PublishableAsIs`, and `PriorBelief`.
 #' @import dplyr
 #' @importFrom purrr map2 map
 #' @importFrom forcats fct_relevel as_factor
 #' @importFrom tidyr unnest pivot_longer nest
 #' @importFrom stringr str_detect
 generate_rating_subsets <- function(ManyEcoEvo) {
-
+  
   out <- ManyEcoEvo %>%
     filter(exclusion_set == "complete" | exclusion_set == "partial") %>%
     mutate(
       data =
         map(data,
-          .f = ~ .x %>%
-            unnest(review_data) %>%
-            select(
-              Zr, VZr,
-              id_col,
-              PublishableAsIs,
-              ReviewerId,
-              TeamIdentifier,
-              RateAnalysis,
-              mixed_model
-            ) %>%
-            mutate(
-              PublishableAsIs =
-                forcats::as_factor(PublishableAsIs) %>%
+            .f = ~ .x %>%
+              unnest(review_data) %>%
+              mutate(
+                PublishableAsIs =
+                  forcats::as_factor(PublishableAsIs) %>%
                   forcats::fct_relevel(c(
                     "deeply flawed and unpublishable",
                     "publishable with major revision",
                     "publishable with minor revision",
                     "publishable as is"
                   ))
-            )
+              )
         )
     ) %>%
     mutate(
       rm_flawed =
         map(data,
-          .f = ~ .x %>%
-            group_by(PublishableAsIs, id_col) %>%
-            count() %>%
-            filter(str_detect(PublishableAsIs, pattern = "flawed")) %>%
-            pull(id_col)
+            .f = ~ .x %>%
+              group_by(PublishableAsIs, id_col) %>%
+              count() %>%
+              filter(str_detect(PublishableAsIs, pattern = "flawed")) %>%
+              pull(id_col)
         ),
       rm_flawed_major =
         map(data,
-          .f = ~ .x %>%
-            group_by(PublishableAsIs, id_col) %>%
-            count() %>%
-            filter(str_detect(PublishableAsIs, pattern = "flawed|major")) %>%
-            pull(id_col)
+            .f = ~ .x %>%
+              group_by(PublishableAsIs, id_col) %>%
+              count() %>%
+              filter(str_detect(PublishableAsIs, pattern = "flawed|major")) %>%
+              pull(id_col)
         )
     ) %>%
     mutate(
@@ -83,11 +76,12 @@ generate_rating_subsets <- function(ManyEcoEvo) {
       values_to = "data"
     ) %>%
     select(-starts_with("rm_")) %>%
-    mutate(data = map(data,
-      .f = ~ group_by(.x, id_col) %>%
-        nest(review_data = c(ReviewerId, RateAnalysis, PublishableAsIs)) %>%
-        ungroup()
-    )) %>%
+    mutate(data = 
+             map(data,
+                 .f = ~ group_by(.x, id_col) %>%
+                   nest(review_data = c(ReviewerId, RateAnalysis, PublishableAsIs, PriorBelief)) %>%
+                   ungroup()
+             )) %>%
     mutate(
       diversity_data =
         map2(
@@ -96,15 +90,13 @@ generate_rating_subsets <- function(ManyEcoEvo) {
           .f = ~ semi_join(.x, .y, by = join_by(id_col)) %>% distinct()
         )
     )
-
-  # DON"T FORGET WE NEED TO RE DO THE DIVERSITY DATA!! TO DEAL WITH REMOVED DATA POINTS!
-
-  # THEN BIND ROWS WITH PREVIOUS DATASETS
+
+  # Bind new subsets with input data
   out <- bind_rows(
     ManyEcoEvo %>%
       mutate(publishable_subset = "All"),
     out
   )
-
+  
   return(out)
 }