Skip to content

Commit

Permalink
Merge pull request #258 from njtierney/speed-up-miss-var-summary
Browse files Browse the repository at this point in the history
  • Loading branch information
njtierney authored May 19, 2020
2 parents 7b9598e + 4e02d37 commit d4349a5
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 9 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: naniar
Type: Package
Title: Data Structures, Summaries, and Visualisations for Missing Data
Version: 0.5.1
Version: 0.5.1.9000
Authors@R: c(
person("Nicholas", "Tierney",
role = c("aut", "cre"),
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# naniar 0.5.1.9000

## Minor Changes

- Improved code in `miss_var_summary()`, `miss_var_table()`, and
`prop_miss_var()`, improving speed.

# naniar 0.5.1 (2020/04/10) "Uncle Andrew's Applewood Wardrobe"

## Minor Changes
Expand Down
10 changes: 6 additions & 4 deletions R/miss-x-summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,12 @@ miss_var_summary.default <- function(data,
add_cumsum = FALSE,
...) {

res <- purrr::map_dfc(data, n_miss) %>%
tidyr::gather(key = "variable", value = "n_miss") %>%
dplyr::mutate(pct_miss = (n_miss / nrow(data) * 100))
col_n_miss <- colSums(is.na(data))
col_pct_miss <- colMeans(is.na(data)) * 100

res <- tibble::tibble(variable = names(col_n_miss),
n_miss = as.integer(col_n_miss),
pct_miss = as.numeric(col_pct_miss))

if (add_cumsum) {
res <- res %>% dplyr::mutate(n_miss_cumsum = cumsum(n_miss))
Expand All @@ -67,7 +70,6 @@ miss_var_summary.default <- function(data,

return(res)


}

#' @export
Expand Down
6 changes: 3 additions & 3 deletions R/miss-x-table.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ miss_var_table <- function(data){

miss_var_table.default <- function(data){

purrr::map_dfc(data, ~n_miss(.)) %>%
tidyr::gather(key = "variable",
value = "n_miss_in_var") %>%
miss_var_summary(data) %>%
dplyr::rename(n_miss_in_var = n_miss) %>%
dplyr::select(-pct_miss) %>%
dplyr::group_by(n_miss_in_var) %>%
dplyr::tally() %>%
dplyr::rename(n_vars = n) %>%
Expand Down
2 changes: 1 addition & 1 deletion R/prop-pct-var-case-miss-complete.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ prop_miss_var <- function(data){
test_if_dataframe(data)

# find the proportion of variables that contain (any) missing values
mean(purrr::map_lgl(data, anyNA))
mean(colSums(is.na(data)) > 0)

} # end function

Expand Down

0 comments on commit d4349a5

Please sign in to comment.