Merge branch 'v2' of https://github.com/ropensci/skimr into v2-print

Conflicts: tests/testthat/test-dplyr.R tests/testthat/test-skim_print.R
ropensci · Mar 23, 2019 · 1843865 · 1843865
2 parents 675ef7f + fd0bb47
commit 1843865
Show file tree

Hide file tree

Showing 27 changed files with 262 additions and 120 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -20,7 +20,9 @@ Authors@R: c(
   person("Mikko","Korpela", email="mvkorpel@iki.fi", role='ctb'),
   person("Jennifer","Thompson", email="thompson.jennifer@gmail.com", role='ctb'),
   person("Harris","McGehee", email="mcgehee.harris@gmail.com", role='ctb'),
-  person("Roepke","Mark",email="mroepke5@gmail.com",role='ctb')
+  person("Roepke","Mark",email="mroepke5@gmail.com",role='ctb'),
+  person("Patrick", "Kennedy", email="pkqstr@protonmail.com", role='ctb'),
+  person("Daniel", "Possenriede", email="possenriede@gmail.com", role='ctb')
   )
 Description: A simple to use summary function that can be used with pipes
     and displays nicely in the console. The default summary statistics may be

diff --git a/NEWS.md b/NEWS.md
@@ -12,6 +12,23 @@ skimr 2.0.0 (2019-xx-xx)
 ### MINOR IMPROVEMENTS
   * Change top_counts to use useNA = "no".
 
+skimr 1.0.6 (xxx-xx-xx)
+=======================
+### BUG FIXES
+  * Fix issue where skim_tee() was not respecting ... options.
+  * Fix issue where all NA character vectors were not returning NA for 
+    max() and min()
+
+skimr 1.0.5 (2019-01-05)
+========================
+This is likely to be the last release of skimr version 1.  Version 2
+has major changes to the API. Users should review and prepare for 
+those changes now. 
+
+### BUG FIXES
+  * Fix issue where multibyte characters were causing an error.
+  * Fix problem in which purrr cannot find mean.default.
+
 skimr 1.0.4 (2018-01-12)
 ========================
 This is likely to be the last release of skimr version 1.  Version 2

diff --git a/R/skim.R b/R/skim.R
@@ -75,7 +75,7 @@ skim <- skim_with()
 #' @param skim_fun The skim function used.
 #' @export
 skim_tee <- function(.data, ..., skim_fun = skim) {
-  skimmed <- skim_fun(.data)
+  skimmed <- skim_fun(.data, ...)
   print(skimmed)
   invisible(.data)
 }
diff --git a/R/skim_print.R b/R/skim_print.R
@@ -62,22 +62,25 @@ print.skim_list <- function(x, n = Inf, width = Inf, n_extra = NULL, ...) {
 #' @describeIn print Print method for a `summary_skim_df` object.
 #' @export
 print.summary_skim_df <- function(x, ...) {
-  cat("Data summary  \n")
+  cat(paste0(cli::rule(line = 1, left = "Data Summary", width = 40), "\n"))
   print(build_summary_string(x))
 }
 
 
 build_summary_string <- function(x) {
   df_name <- ifelse(x$df_name %in% c("`.`", ".data"), "Piped data", x$df_name)
+  df_name <- gsub("`", "", df_name)
+  df_name <- ifelse(nchar(df_name) > 25, paste0(substring(df_name, 1, 25), "..."), df_name)
   groups <- ifelse(is.null(x$possible_groups), "None", paste0(x$possible_groups, collapse = ", "))
-  summary <- data.frame("Values" = c(
-    df_name, x$n_rows, x$n_cols, "",
-    x$type_frequencies$n, groups
+  types <- paste0("  ",x$type_frequencies$type)
+  summary <- data.frame("Value" = c(
+    df_name, x$n_rows, x$n_cols, "", "",
+    x$type_frequencies$n, "", groups, ""
   ))
   row.names(summary) <- c(
-    "Name", "Number of rows ", "Number of columns ",
-    "Column type frequency ", x$type_frequencies$type,
-    "Group variables"
+    "Name", "Number of rows ", "Number of columns "," ",
+    "Column type frequency: ", types,"  ",
+    "Group variables", "   "
   )
   summary
 }

diff --git a/R/skim_with.R b/R/skim_with.R
@@ -282,6 +282,7 @@ build_results <- function(skimmed, data, data_cols, groups, new_names, delim) {
     )
     tidyr::unnest(out)
   } else {
+    new_names <- c(groups, new_names)
     tibble::tibble(
       skim_variable = data_cols,
       !!!purrr::set_names(skimmed, new_names)

diff --git a/R/stats.R b/R/stats.R
@@ -151,6 +151,7 @@ n_empty <- function(x) {
 #'   character vector.
 #' @export
 min_char <- function(x) {
+  if(all(is.na(x))) return(NA)
   characters <- nchar(x, allowNA = TRUE)
   min(characters, na.rm = TRUE)
 }
@@ -159,6 +160,7 @@ min_char <- function(x) {
 #'   character vector.
 #' @export
 max_char <- function(x) {
+  if(all(is.na(x))) return(NA)
   characters <- nchar(x, allowNA = TRUE)
   max(characters, na.rm = TRUE)
 }

diff --git a/codemeta.json b/codemeta.json
@@ -127,6 +127,18 @@
       "givenName": "Roepke",
       "familyName": "Mark",
       "email": "mroepke5@gmail.com"
+    },
+    {
+      "@type": "Person",
+      "givenName": "Patrick",
+      "familyName": "Kennedy",
+      "email": "pkqstr@protonmail.com"
+    },
+    {
+      "@type": "Person",
+      "givenName": "Daniel",
+      "familyName": "Possenriede",
+      "email": "possenriede@gmail.com"
     }
   ],
   "copyrightHolder": [
@@ -236,7 +248,7 @@
         "name": "Comprehensive R Archive Network (CRAN)",
         "url": "https://cran.r-project.org"
       },
-      "sameAs": "https://github.com/tidyverse/dplyr"
+      "sameAs": "https://CRAN.R-project.org/package=dplyr"
     },
     {
       "@type": "SoftwareApplication",
@@ -361,7 +373,7 @@
   ],
   "releaseNotes": "https://github.com/elinw/skimr/blob/master/NEWS.md",
   "readme": "https://github.com/elinw/skimr/blob/master/README.md",
-  "fileSize": "2320.379KB",
+  "fileSize": "2388.553KB",
   "contIntegration": [
     "https://travis-ci.org/ropenscilabs/skimr",
     "https://codecov.io/gh/ropenscilabs/skimr"
@@ -371,5 +383,13 @@
     "url": "https://github.com/ropensci/onboarding/issues/175",
     "provider": "http://ropensci.org"
   },
-  "keywords": ["unconf17", "r", "summary-statistics", "ropensci"]
+  "keywords": [
+    "unconf17",
+    "r",
+    "summary-statistics",
+    "ropensci",
+    "unconf",
+    "r-package",
+    "rstats"
+  ]
 }
diff --git a/tests/testthat/dplyr/arrange.txt b/tests/testthat/dplyr/arrange.txt
@@ -1,12 +1,15 @@
-Data summary  
-                       Values
-Name                   `iris`
+── Data Summary ────────────────────────
+                        Value
+Name                     iris
 Number of rows            150
 Number of columns           5
-Column type frequency        
-factor                      1
-numeric                     4
+
+Column type frequency:       
+  factor                    1
+  numeric                   4
+
 Group variables          None
+
 ── Variable type: factor ───────────────────────────────────────────────────────────────────────────
   skim_variable missing complete     n ordered n_unique top_counts               
 1 Species             0      150   150 FALSE          3 set: 50, ver: 50, vir: 50

diff --git a/tests/testthat/dplyr/filter-no-skim.txt b/tests/testthat/dplyr/filter-no-skim.txt
@@ -1,14 +1,4 @@
-Data summary  
-                       Values
-Name                   `iris`
-Number of rows            150
-Number of columns           5
-Column type frequency        
-numeric                     4
-Group variables          None
-── Variable type: numeric ──────────────────────────────────────────────────────────────────────────
-  skim_variable missing complete     n  mean    sd    p0   p25   p50   p75  p100 hist 
-1 Sepal.Length        0      150   150  5.84 0.828   4.3   5.1  5.8    6.4   7.9 ▆▇▇▅▂
-2 Sepal.Width         0      150   150  3.06 0.436   2     2.8  3      3.3   4.4 ▁▆▇▂▁
-3 Petal.Length        0      150   150  3.76 1.77    1     1.6  4.35   5.1   6.9 ▇▁▆▇▂
-4 Petal.Width         0      150   150  1.20 0.762   0.1   0.3  1.3    1.8   2.5 ▇▁▇▅▃
+# A tibble: 0 x 16
+# … with 16 variables: skim_type <chr>, skim_variable <chr>, missing <int>, complete <int>,
+#   n <int>, ordered <lgl>, n_unique <int>, top_counts <chr>, mean <dbl>, sd <dbl>, p0 <dbl>,
+#   p25 <dbl>, p50 <dbl>, p75 <dbl>, p100 <dbl>, hist <chr>
diff --git a/tests/testthat/dplyr/filter-skim.txt b/tests/testthat/dplyr/filter-skim.txt
@@ -1,11 +1,14 @@
-Data summary  
-                       Values
-Name                   `iris`
+── Data Summary ────────────────────────
+                        Value
+Name                     iris
 Number of rows            150
 Number of columns           5
-Column type frequency        
-numeric                     4
+
+Column type frequency:       
+  numeric                   4
+
 Group variables          None
+
 ── Variable type: numeric ──────────────────────────────────────────────────────────────────────────
   skim_variable missing complete     n  mean    sd    p0   p25   p50   p75  p100 hist 
 1 Sepal.Length        0      150   150  5.84 0.828   4.3   5.1  5.8    6.4   7.9 ▆▇▇▅▂

diff --git a/tests/testthat/dplyr/mutate-skim.txt b/tests/testthat/dplyr/mutate-skim.txt
@@ -1,12 +1,15 @@
-Data summary  
-                       Values
-Name                   `iris`
+── Data Summary ────────────────────────
+                        Value
+Name                     iris
 Number of rows            150
 Number of columns           5
-Column type frequency        
-factor                      1
-numeric                     4
+
+Column type frequency:       
+  factor                    1
+  numeric                   4
+
 Group variables          None
+
 ── Variable type: factor ───────────────────────────────────────────────────────────────────────────
   skim_variable missing complete     n ordered n_unique top_counts               
 1 Species             0      150   150 FALSE          3 set: 50, ver: 50, vir: 50

diff --git a/tests/testthat/dplyr/select-skim.txt b/tests/testthat/dplyr/select-skim.txt
@@ -1,12 +1,15 @@
-Data summary  
-                       Values
-Name                   `iris`
+── Data Summary ────────────────────────
+                        Value
+Name                     iris
 Number of rows            150
 Number of columns           5
-Column type frequency        
-factor                      1
-numeric                     4
+
+Column type frequency:       
+  factor                    1
+  numeric                   4
+
 Group variables          None
+
 ── Variable type: factor ───────────────────────────────────────────────────────────────────────────
   skim_variable
 1 Species      

diff --git a/tests/testthat/dplyr/slice.txt b/tests/testthat/dplyr/slice.txt
@@ -1,12 +1,15 @@
-Data summary  
-                       Values
-Name                   `iris`
+── Data Summary ────────────────────────
+                        Value
+Name                     iris
 Number of rows            150
 Number of columns           5
-Column type frequency        
-factor                      1
-numeric                     2
+
+Column type frequency:       
+  factor                    1
+  numeric                   2
+
 Group variables          None
+
 ── Variable type: factor ───────────────────────────────────────────────────────────────────────────
   skim_variable missing complete     n ordered n_unique top_counts               
 1 Species             0      150   150 FALSE          3 set: 50, ver: 50, vir: 50

diff --git a/tests/testthat/print/default.txt b/tests/testthat/print/default.txt
@@ -1,12 +1,15 @@
-Data summary  
-                       Values
-Name                   `iris`
+── Data Summary ────────────────────────
+                        Value
+Name                     iris
 Number of rows            150
 Number of columns           5
-Column type frequency        
-factor                      1
-numeric                     4
+
+Column type frequency:       
+  factor                    1
+  numeric                   4
+
 Group variables          None
+
 ── Variable type: factor ───────────────────────────────────────────────────────────────────────────
   skim_variable missing complete     n ordered n_unique top_counts               
 1 Species             0      150   150 FALSE          3 set: 50, ver: 50, vir: 50

diff --git a/tests/testthat/print/focus.txt b/tests/testthat/print/focus.txt
@@ -1,12 +1,15 @@
-Data summary  
-                       Values
-Name                   `iris`
+── Data Summary ────────────────────────
+                        Value
+Name                     iris
 Number of rows            150
 Number of columns           5
-Column type frequency        
-factor                      1
-numeric                     4
+
+Column type frequency:       
+  factor                    1
+  numeric                   4
+
 Group variables          None
+
 ── Variable type: factor ───────────────────────────────────────────────────────────────────────────
   skim_variable missing
 1 Species             0

diff --git a/tests/testthat/print/groups.txt b/tests/testthat/print/groups.txt
@@ -1,11 +1,14 @@
-Data summary  
-                                                 Values
-Name                   `dplyr::group_by(iris, Species)`
-Number of rows                                      150
-Number of columns                                     5
-Column type frequency                                  
-numeric                                               4
-Group variables                                 Species
+── Data Summary ────────────────────────
+                                               Value
+Name                    dplyr::group_by(iris, Spe...
+Number of rows                                   150
+Number of columns                                  5
+
+Column type frequency:                              
+  numeric                                          4
+
+Group variables                              Species
+
 ── Variable type: numeric ──────────────────────────────────────────────────────────────────────────
    skim_variable Species    missing complete     n  mean    sd    p0   p25   p50   p75  p100 hist 
  * <chr>         <fct>        <int>    <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>

diff --git a/tests/testthat/print/knit_print-summary.txt b/tests/testthat/print/knit_print-summary.txt
@@ -1,9 +1,12 @@
-|                      |       |
-|:---------------------|:------|
-|Name                  |`iris` |
-|Number of rows        |150    |
-|Number of columns     |5      |
-|Column type frequency |       |
-|factor                |1      |
-|numeric               |4      |
-|Group variables       |None   |
+|                       |     |
+|:----------------------|:----|
+|Name                   |iris |
+|Number of rows         |150  |
+|Number of columns      |5    |
+|                       |     |
+|Column type frequency: |     |
+|factor                 |1    |
+|numeric                |4    |
+|                       |     |
+|Group variables        |None |
+|                       |     |
diff --git a/tests/testthat/print/knit_print.txt b/tests/testthat/print/knit_print.txt
@@ -1,13 +1,16 @@
 
-|                      |       |
-|:---------------------|:------|
-|Name                  |`iris` |
-|Number of rows        |150    |
-|Number of columns     |5      |
-|Column type frequency |       |
-|factor                |1      |
-|numeric               |4      |
-|Group variables       |None   |
+|                       |     |
+|:----------------------|:----|
+|Name                   |iris |
+|Number of rows         |150  |
+|Number of columns      |5    |
+|                       |     |
+|Column type frequency: |     |
+|factor                 |1    |
+|numeric                |4    |
+|                       |     |
+|Group variables        |None |
+|                       |     |
 
 
 **Variable type: factor**