diff --git a/DESCRIPTION b/DESCRIPTION index e72945c7..f953c423 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: naniar Type: Package Title: Data Structures, Summaries, and Visualisations for Missing Data -Version: 0.5.1 +Version: 0.5.1.9000 Authors@R: c( person("Nicholas", "Tierney", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index 0adf0286..9b901d15 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# naniar 0.5.1.9000 + +## Minor Changes + +- Improved code in `miss_var_summary()`, `miss_var_table()`, and + `prop_miss_var()`, improving speed. + # naniar 0.5.1 (2020/04/10) "Uncle Andrew's Applewood Wardrobe" ## Minor Changes diff --git a/R/miss-x-summary.R b/R/miss-x-summary.R index 17687a91..2e6e6e9e 100644 --- a/R/miss-x-summary.R +++ b/R/miss-x-summary.R @@ -53,9 +53,12 @@ miss_var_summary.default <- function(data, add_cumsum = FALSE, ...) { - res <- purrr::map_dfc(data, n_miss) %>% - tidyr::gather(key = "variable", value = "n_miss") %>% - dplyr::mutate(pct_miss = (n_miss / nrow(data) * 100)) + col_n_miss <- colSums(is.na(data)) + col_pct_miss <- colMeans(is.na(data)) * 100 + + res <- tibble::tibble(variable = names(col_n_miss), + n_miss = as.integer(col_n_miss), + pct_miss = as.numeric(col_pct_miss)) if (add_cumsum) { res <- res %>% dplyr::mutate(n_miss_cumsum = cumsum(n_miss)) @@ -67,7 +70,6 @@ miss_var_summary.default <- function(data, return(res) - } #' @export diff --git a/R/miss-x-table.R b/R/miss-x-table.R index 0e81704c..c07d8a48 100644 --- a/R/miss-x-table.R +++ b/R/miss-x-table.R @@ -87,9 +87,9 @@ miss_var_table <- function(data){ miss_var_table.default <- function(data){ - purrr::map_dfc(data, ~n_miss(.)) %>% - tidyr::gather(key = "variable", - value = "n_miss_in_var") %>% + miss_var_summary(data) %>% + dplyr::rename(n_miss_in_var = n_miss) %>% + dplyr::select(-pct_miss) %>% dplyr::group_by(n_miss_in_var) %>% dplyr::tally() %>% dplyr::rename(n_vars = n) %>% diff --git a/R/prop-pct-var-case-miss-complete.R b/R/prop-pct-var-case-miss-complete.R index 2d10f871..963da164 100644 --- a/R/prop-pct-var-case-miss-complete.R +++ b/R/prop-pct-var-case-miss-complete.R @@ -26,7 +26,7 @@ prop_miss_var <- function(data){ test_if_dataframe(data) # find the proportion of variables that contain (any) missing values - mean(purrr::map_lgl(data, anyNA)) + mean(colSums(is.na(data)) > 0) } # end function