diff --git a/docs/CONDUCT.html b/docs/CONDUCT.html index 77bca0f0..4ba2dbf8 100644 --- a/docs/CONDUCT.html +++ b/docs/CONDUCT.html @@ -1,6 +1,6 @@ - +
@@ -8,18 +8,25 @@vignettes/exploring-imputed-values.Rmd
exploring-imputed-values.Rmd
impute_below
impute_below
imputes values below the minimum of the data, with some noise to reduce overplotting. The amount data is imputed below, and the amount of jitter, can be changed by changing the arguments prop_below
and jitter
.
library(dplyr)
+library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
@@ -125,12 +133,12 @@
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
-library(naniar)
+library(naniar)
airquality %>%
- impute_below_at(vars(Ozone)) %>%
- select(Ozone, Solar.R) %>%
- head()
+ impute_below_at(vars(Ozone)) %>%
+ select(Ozone, Solar.R) %>%
+ head()
#> Ozone Solar.R
#> 1 41.00000 190
#> 2 36.00000 118
@@ -157,10 +165,10 @@
If the impute_
functions are used as-is - e.g., impute_mean
, this will work on a single vector, but not a data.frame.
Some examples for impute_mean
are now given:
-impute_mean(oceanbuoys$air_temp_c) %>% head()
+impute_mean(oceanbuoys$air_temp_c) %>% head()
#> [1] 27.15 27.02 27.00 26.93 26.84 26.94
-impute_mean_at(oceanbuoys, .vars = vars(air_temp_c)) %>% head()
+impute_mean_at(oceanbuoys, .vars = vars(air_temp_c)) %>% head()
#> # A tibble: 6 x 8
#> year latitude longitude sea_temp_c air_temp_c humidity wind_ew wind_ns
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
@@ -171,7 +179,7 @@
#> 5 1997 0 -110 27.6 26.8 76.4 -3.5 4.10
#> 6 1997 0 -110 27.8 26.9 76.7 -4.40 1.60
-impute_mean_if(oceanbuoys, .predicate = is.integer) %>% head()
+impute_mean_if(oceanbuoys, .predicate = is.integer) %>% head()
#> # A tibble: 6 x 8
#> year latitude longitude sea_temp_c air_temp_c humidity wind_ew wind_ns
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
@@ -182,7 +190,7 @@
#> 5 1997 0 -110 27.6 26.8 76.4 -3.5 4.10
#> 6 1997 0 -110 27.8 26.9 76.7 -4.40 1.60
-impute_mean_all(oceanbuoys) %>% head()
+impute_mean_all(oceanbuoys) %>% head()
#> # A tibble: 6 x 8
#> year latitude longitude sea_temp_c air_temp_c humidity wind_ew wind_ns
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
@@ -202,7 +210,7 @@
Imputing values using simputation
We can impute the data using the easy-to-use simputation
package, and then track the missingness using bind_shadow
and add_label_shadow
:
-library(simputation)
+library(simputation)
#>
#> Attaching package: 'simputation'
#> The following object is masked from 'package:naniar':
@@ -210,46 +218,55 @@
#> impute_median
ocean_imp <- oceanbuoys %>%
bind_shadow() %>%
- impute_lm(air_temp_c ~ wind_ew + wind_ns) %>%
- impute_lm(humidity ~ wind_ew + wind_ns) %>%
- impute_lm(sea_temp_c ~ wind_ew + wind_ns) %>%
+ impute_lm(air_temp_c ~ wind_ew + wind_ns) %>%
+ impute_lm(humidity ~ wind_ew + wind_ns) %>%
+ impute_lm(sea_temp_c ~ wind_ew + wind_ns) %>%
add_label_shadow()
We can then show the previously missing (now imputed!) data in a scatterplot with ggplot2 by setting the color
aesthetic in ggplot to any_missing
:
-library(ggplot2)
-ggplot(ocean_imp,
- aes(x = air_temp_c,
+library(ggplot2)
+ggplot(ocean_imp,
+ aes(x = air_temp_c,
y = humidity,
color = any_missing)) +
- geom_point() +
- scale_color_brewer(palette = "Dark2") +
- theme(legend.position = "bottom")
+ geom_point() +
+ scale_color_brewer(palette = "Dark2") +
+ theme(legend.position = "bottom")
Or, if you want to look at one variable, you can look at a density plot of one variable, using fill = any_missing
-ggplot(ocean_imp,
- aes(x = air_temp_c,
+ggplot(ocean_imp,
+ aes(x = air_temp_c,
fill = any_missing)) +
- geom_density(alpha = 0.3) +
- scale_fill_brewer(palette = "Dark2") +
- theme(legend.position = "bottom")
+ geom_density(alpha = 0.3) +
+ scale_fill_brewer(palette = "Dark2") +
+ theme(legend.position = "bottom")
-ggplot(ocean_imp,
- aes(x = humidity,
+ggplot(ocean_imp,
+ aes(x = humidity,
fill = any_missing)) +
- geom_density(alpha = 0.3) +
- scale_fill_brewer(palette = "Dark2") +
- theme(legend.position = "bottom")
+ geom_density(alpha = 0.3) +
+ scale_fill_brewer(palette = "Dark2") +
+ theme(legend.position = "bottom")
We can also compare imputed values to complete cases by grouping by any_missing
, and summarising.
ocean_imp %>%
- group_by(any_missing) %>%
- summarise_at(.vars = vars(air_temp_c),
- .funs = funs(min, mean, median, max, .args = list(na.rm = TRUE)))
-#> # A tibble: 2 x 5
-#> any_missing min mean median max
-#> <chr> <dbl> <dbl> <dbl> <dbl>
-#> 1 Missing 21.4 23.9 24.4 25.2
-#> 2 Not Missing 22.1 25.3 25.8 28.5
+ group_by(any_missing) %>%
+ summarise_at(.vars = vars(air_temp_c),
+ .funs = funs(min, mean, median, max, .args = list(na.rm = TRUE)))
+#> Warning: funs() is soft deprecated as of dplyr 0.8.0
+#> please use list() instead
+#>
+#> # Before:
+#> funs(name = f(.)
+#>
+#> # After:
+#> list(name = ~f(.))
+#> This warning is displayed once per session.
+#> # A tibble: 2 x 5
+#> any_missing min mean median max
+#> <chr> <dbl> <dbl> <dbl> <dbl>
+#> 1 Missing 21.4 23.9 24.4 25.2
+#> 2 Not Missing 22.1 25.3 25.8 28.5
One thing that we notice with our imputations are that they aren’t very good - we can improve upon the imputation by including the variables year and latitude and longitude:
ocean_imp_yr <- oceanbuoys %>%
bind_shadow() %>%
- impute_lm(air_temp_c ~ wind_ew + wind_ns + year + longitude + latitude) %>%
- impute_lm(humidity ~ wind_ew + wind_ns + year + longitude + latitude) %>%
- impute_lm(sea_temp_c ~ wind_ew + wind_ns + year + longitude + latitude) %>%
+ impute_lm(air_temp_c ~ wind_ew + wind_ns + year + longitude + latitude) %>%
+ impute_lm(humidity ~ wind_ew + wind_ns + year + longitude + latitude) %>%
+ impute_lm(sea_temp_c ~ wind_ew + wind_ns + year + longitude + latitude) %>%
add_label_shadow()
ggplot(ocean_imp_yr,
- aes(x = air_temp_c,
+ggplot(ocean_imp_yr,
+ aes(x = air_temp_c,
y = humidity,
color = any_missing)) +
- geom_point() +
- scale_color_brewer(palette = "Dark2") +
- theme(legend.position = "bottom")
+ geom_point() +
+ scale_color_brewer(palette = "Dark2") +
+ theme(legend.position = "bottom")
We can explore using a single imputation of Hmisc::aregImpute()
, which allows for multiple imputation with bootstrapping, additive regression, and predictive mean matching. We are going to explore predicting mean matching, and single imputation.
We can explore using a single imputation of Hmisc::aregImpute()
, which allows for multiple imputation with bootstrapping, additive regression, and predictive mean matching. We are going to explore predicting mean matching, and single imputation.
-library(Hmisc)
+library(Hmisc)
#> Loading required package: lattice
#> Loading required package: survival
#> Loading required package: Formula
@@ -337,16 +354,16 @@
aq_nab <- nabular(airquality) %>% add_label_shadow()
# insert imputed values
-aq_nab$Ozone[is.na(aq_nab$Ozone)] <- aq_imp$imputed$Ozone
-aq_nab$Solar.R[is.na(aq_nab$Solar.R)] <- aq_imp$imputed$Solar.R
In the future there will be a more concise way to insert these imputed values into data, but for the moment the method above is what I would recommend for single imputation.
We can then explore the imputed values like so:
-ggplot(aq_nab,
- aes(x = Ozone,
+ggplot(aq_nab,
+ aes(x = Ozone,
y = Solar.R,
colour = any_missing)) +
- geom_point()
vignettes/getting-started-w-naniar.Rmd
getting-started-w-naniar.Rmd
When you start with a dataset, you might do something where you look at the general summary, using functions such as:
summary()
str()
summary()
str()
skimr::skim
, ordplyr::glimpse()
skimr::skim
, or
+dplyr::glimpse()
These works really well when you’ve got a small amount of data, but when you have more data, you are generally limited by how much you can read.
So before you start looking at missing data, you’ll need to look at the data, but what does that even mean?
@@ -147,7 +155,7 @@vis_dat
library(visdat)
+
vis_dat
visualises the whole dataframe at once, and provides information about the class of the data input into R, as well as whether the data is missing or not.
@@ -176,41 +184,41 @@
Day
Typically, when exploring this data, you might want to explore the variables Solar.R and Ozone, and so plot a scatterplot of solar radiation and ozone, doing something like this:
-
## Warning: Removed 42 rows containing missing values (geom_point).
The problem with this is that ggplot does not handle missings be default, and removes the missing values. This makes them hard to explore. It also presents the strange question of “how do you visualise something that is not there?”. One approach to visualising missing data comes from ggobi
and manet
, where we replace “NA” values with values 10% lower than the minimum value in that variable.
This process is performed and visualised for you with the geom_miss_point()
ggplot2 geom. Here, we illustrate by exploring the relationship between Ozone and Solar radiation from the airquality dataset.
-
## Warning: Removed 42 rows containing missing values (geom_point).
-library(naniar)
+library(naniar)
-ggplot(airquality,
- aes(x = Solar.R,
+ggplot(airquality,
+ aes(x = Solar.R,
y = Ozone)) +
geom_miss_point()
Being a proper ggplot geom, it supports all of the standard features of ggplot2, such as facets,
-
And different themes
-ggplot(airquality,
- aes(x = Solar.R,
+ggplot(airquality,
+ aes(x = Solar.R,
y = Ozone)) +
geom_miss_point() +
- facet_wrap(~Month) +
- theme_dark()
+ facet_wrap(~Month) +
+ theme_dark()
@@ -219,9 +227,9 @@
The plots created with the gg_miss
family all have a basic theme, but you can customise them, and add arguments like so:
-
+
-
+
To add facets in these plots, you can use the facet
argument:
@@ -264,12 +272,12 @@
## 8 !NA !NA !NA !NA !NA !NA
## 9 !NA !NA !NA !NA !NA !NA
## 10 NA !NA !NA !NA !NA !NA
-## # ... with 143 more rows
+## # … with 143 more rows
bind_shadow
attaches a shadow to the current dataframe, a format we call “nabular”, a portmanteau of NA
a tabular
. You can also use nabular
to do the same thing:
+library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
@@ -278,45 +286,45 @@
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
-
+
## Observations: 153
## Variables: 12
-## $ Ozone <int> 41, 36, 12, 18, NA, 28, 23, 19, 8, NA, 7, 16, 11, 1...
-## $ Solar.R <int> 190, 118, 149, 313, NA, NA, 299, 99, 19, 194, NA, 2...
-## $ Wind <dbl> 7.4, 8.0, 12.6, 11.5, 14.3, 14.9, 8.6, 13.8, 20.1, ...
-## $ Temp <int> 67, 72, 74, 62, 56, 66, 65, 59, 61, 69, 74, 69, 66,...
-## $ Month <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ...
-## $ Day <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ...
-## $ Ozone_NA <fct> !NA, !NA, !NA, !NA, NA, !NA, !NA, !NA, !NA, NA, !NA...
-## $ Solar.R_NA <fct> !NA, !NA, !NA, !NA, NA, NA, !NA, !NA, !NA, !NA, NA,...
-## $ Wind_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !...
-## $ Temp_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !...
-## $ Month_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !...
-## $ Day_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !...
-
+## $ Ozone <int> 41, 36, 12, 18, NA, 28, 23, 19, 8, NA, 7, 16, 11, 14,…
+## $ Solar.R <int> 190, 118, 149, 313, NA, NA, 299, 99, 19, 194, NA, 256…
+## $ Wind <dbl> 7.4, 8.0, 12.6, 11.5, 14.3, 14.9, 8.6, 13.8, 20.1, 8.…
+## $ Temp <int> 67, 72, 74, 62, 56, 66, 65, 59, 61, 69, 74, 69, 66, 6…
+## $ Month <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
+## $ Day <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16…
+## $ Ozone_NA <fct> !NA, !NA, !NA, !NA, NA, !NA, !NA, !NA, !NA, NA, !NA, …
+## $ Solar.R_NA <fct> !NA, !NA, !NA, !NA, NA, NA, !NA, !NA, !NA, !NA, NA, !…
+## $ Wind_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA…
+## $ Temp_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA…
+## $ Month_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA…
+## $ Day_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA…
+
## Observations: 153
## Variables: 12
-## $ Ozone <int> 41, 36, 12, 18, NA, 28, 23, 19, 8, NA, 7, 16, 11, 1...
-## $ Solar.R <int> 190, 118, 149, 313, NA, NA, 299, 99, 19, 194, NA, 2...
-## $ Wind <dbl> 7.4, 8.0, 12.6, 11.5, 14.3, 14.9, 8.6, 13.8, 20.1, ...
-## $ Temp <int> 67, 72, 74, 62, 56, 66, 65, 59, 61, 69, 74, 69, 66,...
-## $ Month <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ...
-## $ Day <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ...
-## $ Ozone_NA <fct> !NA, !NA, !NA, !NA, NA, !NA, !NA, !NA, !NA, NA, !NA...
-## $ Solar.R_NA <fct> !NA, !NA, !NA, !NA, NA, NA, !NA, !NA, !NA, !NA, NA,...
-## $ Wind_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !...
-## $ Temp_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !...
-## $ Month_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !...
-## $ Day_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !...
-
+## $ Ozone <int> 41, 36, 12, 18, NA, 28, 23, 19, 8, NA, 7, 16, 11, 14,…
+## $ Solar.R <int> 190, 118, 149, 313, NA, NA, 299, 99, 19, 194, NA, 256…
+## $ Wind <dbl> 7.4, 8.0, 12.6, 11.5, 14.3, 14.9, 8.6, 13.8, 20.1, 8.…
+## $ Temp <int> 67, 72, 74, 62, 56, 66, 65, 59, 61, 69, 74, 69, 66, 6…
+## $ Month <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
+## $ Day <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16…
+## $ Ozone_NA <fct> !NA, !NA, !NA, !NA, NA, !NA, !NA, !NA, !NA, NA, !NA, …
+## $ Solar.R_NA <fct> !NA, !NA, !NA, !NA, NA, NA, !NA, !NA, !NA, !NA, NA, !…
+## $ Wind_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA…
+## $ Temp_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA…
+## $ Month_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA…
+## $ Day_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA…
+
## [1] TRUE
Doing this provides a consistent syntax for referring to variables with missing values.
Nabular data provides a useful pattern to explore missing values, grouping by the missing/complete of one variable and looking at the mean and other summary values. Below we show the mean, sd, variance, and min and max values of Solar.R for when Ozone is present, and when it is missing.
airquality %>%
bind_shadow() %>%
- group_by(Ozone_NA) %>%
- summarise_at(.vars = "Solar.R",
- .funs = c("mean", "sd", "var", "min", "max"),
+ group_by(Ozone_NA) %>%
+ summarise_at(.vars = "Solar.R",
+ .funs = c("mean", "sd", "var", "min", "max"),
na.rm = TRUE)
## # A tibble: 2 x 6
## Ozone_NA mean sd var min max
@@ -324,10 +332,10 @@
## 1 !NA 185. 91.2 8309. 7 334
## 2 NA 190. 87.7 7690. 31 332
Below, we can plot the distribution of Temperature, plotting for values of temperature when Ozone is missing, and when it is not missing.
-
Binding the shadow here also has great benefits when combined with imputation.
@@ -335,36 +343,36 @@
Visualising imputed values
With the easy-to-use simputation
package, we impute values for Ozone, then visualise the data:
-
+
##
## Attaching package: 'simputation'
## The following object is masked from 'package:naniar':
##
## impute_median
-library(dplyr)
+library(dplyr)
airquality %>%
- impute_lm(Ozone ~ Temp + Wind) %>%
- ggplot(aes(x = Temp,
+ impute_lm(Ozone ~ Temp + Wind) %>%
+ ggplot(aes(x = Temp,
y = Ozone)) +
- geom_point()
+ geom_point()
Note that we no longer get any errors regarding missing observations - because they are all imputed! But this comes at a cost: we also no longer have information about where the imputations are - they are now sort of invisible.
Using the shadow matrix to keep track of where the missings are, you can actually keep track of the imputations, by colouring by what was previously missing in Ozone.
aq_shadow %>%
- impute_lm(Ozone ~ Temp + Wind) %>%
- ggplot(aes(x = Temp,
+ impute_lm(Ozone ~ Temp + Wind) %>%
+ ggplot(aes(x = Temp,
y = Ozone,
colour = Ozone_NA)) +
- geom_point()
+ geom_point()
naniar
also provide numerical summaries for missing data. Two convenient counters of complete values and missings are n_miss()
and n_complete()
. These work on both dataframes and vectors, similar to dplyr::n_distinct()
naniar
also provide numerical summaries for missing data. Two convenient counters of complete values and missings are n_miss()
and n_complete()
. These work on both dataframes and vectors, similar to dplyr::n_distinct()
## [1] 153
-
+
## [1] 68
## [1] 44
@@ -405,7 +413,7 @@ miss_case_table()
tabulates the number of missing values in a case / row. Below, this shows the number of missings in a case:
miss_var_span()
is used to determine the number of missings over a specified repeating span of rows in variable of a dataframe. Similar to miss_var_run()
, you specify the variable that you wish to explore, you then also specify the size of the span with the span_every
argument.
miss_var_span(pedestrian,
hourly_counts,
@@ -499,13 +507,13 @@
## 8 8 0 100 0 1
## 9 9 0 100 0 1
## 10 10 0 100 0 1
-## # ... with 367 more rows
+## # … with 367 more rows
group_by()
with naniarEvery miss_*
summary function that returns a dataframe can be used with dplyr group_by()
. For example, if you would like to look at the number of missing values for all variables of pedestrian data.
group_by()
with naniar
+Every miss_*
summary function that returns a dataframe can be used with dplyr group_by()
. For example, if you would like to look at the number of missing values for all variables of pedestrian data.
## # A tibble: 9 x 3
## variable n_miss pct_miss
@@ -521,9 +529,9 @@
## 9 sensor_name 0 0
We see that this is in hourly_counts
. We can then explore this by month, and filder by the variable being hourly_counts
, since it is the only one with missing values.
pedestrian %>%
- group_by(month) %>%
+ group_by(month) %>%
miss_var_summary() %>%
- filter(variable == "hourly_counts")
## # A tibble: 12 x 4
## month variable n_miss pct_miss
## <ord> <chr> <int> <dbl>
@@ -551,7 +559,7 @@
Here, the approach is to predict the proportion of missingness in a given case, using all variables. There is a little helper function to add a column with the proportion of cases or rows missing - add_prop_miss()
. This created a column named “prop_miss”, which is the proportion of missing values in that row.
+ head()
## Ozone Solar.R Wind Temp Month Day prop_miss_all
## 1 41 190 7.4 67 5 1 0.0000000
## 2 36 118 8.0 72 5 2 0.0000000
@@ -560,13 +568,13 @@
## 5 NA NA 14.3 56 5 5 0.3333333
## 6 28 NA 14.9 66 5 6 0.1666667
We can then use a model like decision trees to predict which variables and their values are important for predicting the proportion of missingness:
-library(rpart)
-library(rpart.plot)
+library(rpart)
+library(rpart.plot)
airquality %>%
add_prop_miss() %>%
- rpart(prop_miss_all ~ ., data = .) %>%
- prp(type = 4, extra = 101, prefix = "Prop. Miss = ")
+ rpart(prop_miss_all ~ ., data = .) %>%
+ prp(type = 4, extra = 101, prefix = "Prop. Miss = ")
## Warning: Cannot retrieve the data used to build the model (so cannot determine roundint and is.binary for the variables).
## To silence this warning:
## Call prp with roundint=FALSE,
@@ -633,7 +641,7 @@
Numerical summaries of missing values
- Using group_by()
with naniar
+ Using group_by()
with naniar
Modelling missingness
Summary
Future development
@@ -651,14 +659,13 @@
vignettes/naniar-visualisation.Rmd
naniar-visualisation.Rmd
One of the first plots that I recommend you start with when you are first exploring your missing data, is the vis_miss()
plot, which is re-exported from visdat
.
library(naniar)
+
@@ -131,7 +139,7 @@
We can explore this with more complex data, such as riskfactors:
-The default option of gg_miss_upset
is taken from UpSetR::upset
- which is to use up to 5 sets and up to 40 interactions. Here, setting nsets = 5
means to look at 5 variables and their combinations. The number of combinations or rather intersections
is controlled by nintersects
. You could, for example look at all of the number of missing variables using n_var_miss
:
+The default option of gg_miss_upset
is taken from UpSetR::upset
- which is to use up to 5 sets and up to 40 interactions. Here, setting nsets = 5
means to look at 5 variables and their combinations. The number of combinations or rather intersections
is controlled by nintersects
. You could, for example look at all of the number of missing variables using n_var_miss
:
## [1] 24
@@ -158,35 +166,35 @@
geom_miss_point()
-library(ggplot2)
+library(ggplot2)
# using regular geom_point()
-ggplot(airquality,
- aes(x = Ozone,
+ggplot(airquality,
+ aes(x = Ozone,
y = Solar.R)) +
-geom_point()
+geom_point()
## Warning: Removed 42 rows containing missing values (geom_point).
-library(naniar)
+library(naniar)
# using geom_miss_point()
-ggplot(airquality,
- aes(x = Ozone,
+ggplot(airquality,
+ aes(x = Ozone,
y = Solar.R)) +
geom_miss_point()
# Facets!
-ggplot(airquality,
- aes(x = Ozone,
+ggplot(airquality,
+ aes(x = Ozone,
y = Solar.R)) +
geom_miss_point() +
- facet_wrap(~Month)
+ facet_wrap(~Month)
# Themes
-ggplot(airquality,
- aes(x = Ozone,
+ggplot(airquality,
+ aes(x = Ozone,
y = Solar.R)) +
geom_miss_point() +
- theme_dark()
+ theme_dark()
This plot shows the number of missing values in each variable in a dataset. It is powered by the miss_var_summary()
function.
If you wish, you can also change whether to show the % of missing instead with show_pct = TRUE
.
This plot shows the number of missing values in each case. It is powered by the miss_case_summary()
function.
You can also order by the number of cases using order_cases = TRUE
gg_miss_fct()
This plot shows the number of missings in each column, broken down by a categorical variable from the dataset. It is powered by a dplyr::group_by
statement followed by miss_var_summary()
.
This plot shows the number of missings in each column, broken down by a categorical variable from the dataset. It is powered by a dplyr::group_by
statement followed by miss_var_summary()
.
## Warning: Factor `marital` contains implicit NA, consider using
+## `forcats::fct_explicit_na`
+
+## Warning: Factor `marital` contains implicit NA, consider using
+## `forcats::fct_explicit_na`
-library(ggplot2)
-gg_miss_fct(x = riskfactors, fct = marital) + labs(title = "NA in Risk Factors and Marital status")
library(ggplot2)
+gg_miss_fct(x = riskfactors, fct = marital) + labs(title = "NA in Risk Factors and Marital status")
## Warning: Factor `marital` contains implicit NA, consider using
+## `forcats::fct_explicit_na`
+
+## Warning: Factor `marital` contains implicit NA, consider using
+## `forcats::fct_explicit_na`
-
+
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
@@ -247,9 +265,14 @@
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
-
+
+## Warning: Factor `marital` contains implicit NA, consider using
+## `forcats::fct_explicit_na`
+
+## Warning: Factor `marital` contains implicit NA, consider using
+## `forcats::fct_explicit_na`
## # A tibble: 231 x 4
## marital variable n_miss pct_miss
## <fct> <chr> <int> <dbl>
@@ -263,16 +286,16 @@
## 8 Married weight_lbs 6 4.58
## 9 Married bmi 6 4.58
## 10 Married diet_fruit 4 3.05
-## # ... with 221 more rows
+## # … with 221 more rows
gg_miss_span()
This plot shows the number of missings in a given span, or breaksize, for a single selected variable. In this case we look at the span of hourly_counts
from the pedestrian dataset. It is powered by the miss_var_span
function
## # A tibble: 13 x 5
## span_counter n_miss n_complete prop_miss prop_complete
## <int> <int> <dbl> <dbl> <dbl>
@@ -289,18 +312,18 @@
## 11 11 0 3000 0 1
## 12 12 745 2255 0.248 0.752
## 13 13 432 2568 0.144 0.856
-
+
-# works with the rest of ggplot
-gg_miss_span(pedestrian, hourly_counts, span_every = 3000) + labs(x = "custom")
# works with the rest of ggplot
+gg_miss_span(pedestrian, hourly_counts, span_every = 3000) + labs(x = "custom")
You can also explore miss_var_span
by group with the facet
argument.
gg_miss_case_cumsum()
This plot shows the cumulative sum of missing values, reading the rows of the dataset from the top to bottom. It is powered by the miss_case_cumsum()
function.
gg_miss_var_cumsum()
This plot shows the cumulative sum of missing values, reading columns from the left to the right of your dataframe. It is powered by the miss_var_cumsum()
function.
gg_miss_which()
This plot shows a set of rectangles that indicate whether there is a missing element in a column or not.
- +vignettes/replace-with-na.Rmd
replace-with-na.Rmd
First, we introduce a small fictional dataset, df
, which contains some common features of a dataset with the sorts of missing values we might encounter. This includes multiple specifications of missing values, such as “N/A”, “N A”, and “Not Available”. And also some common numeric codes, like -98, -99, and -1.
-df <- tibble::tribble(
+df <- tibble::tribble(
~name, ~x, ~y, ~z,
"N/A", 1, "N/A", -100,
"N A", 3, "NOt available", -99,
@@ -132,9 +140,9 @@
What if we want to replace the value -99 in the x
column with a missing value?
First, let’s load naniar
:
-
+
Now, we specify the fact that we want to replace -99 with a missing value. To do so we use the replace
argument, and specify a named list, which contains the names of the variable and the value it would take to replace with NA
.
-df %>% replace_with_na(replace = list(x = -99))
+df %>% replace_with_na(replace = list(x = -99))
#> # A tibble: 5 x 4
#> name x y z
#> <chr> <dbl> <chr> <dbl>
@@ -145,7 +153,7 @@
#> 5 John Smith -98 28 -1
And say we want to replace -98 as well?
df %>%
- replace_with_na(replace = list(x = c(-99, -98)))
+ replace_with_na(replace = list(x = c(-99, -98)))
#> # A tibble: 5 x 4
#> name x y z
#> <chr> <dbl> <chr> <dbl>
@@ -156,8 +164,8 @@
#> 5 John Smith NA 28 -1
And then what if we want to replace -99 and -98 in all the numeric columns, x and z?
df %>%
- replace_with_na(replace = list(x = c(-99,-98),
- z = c(-99, -98)))
+ replace_with_na(replace = list(x = c(-99,-98),
+ z = c(-99, -98)))
#> # A tibble: 5 x 4
#> name x y z
#> <chr> <dbl> <chr> <dbl>
@@ -203,7 +211,7 @@
Likewise, if you have a set of (annoying) repeating strings like various spellings of “NA”, then I suggest you first lay out all the offending cases:
# write out all the offending strings
-na_strings <- c("NA", "N A", "N / A", "N/A", "N/ A", "Not Available", "NOt available")
+na_strings <- c("NA", "N A", "N / A", "N/A", "N/ A", "Not Available", "NOt available")
Then you write ~.x %in% na_strings
- which reads as “does this value occur in the list of NA strings”.
df %>%
@@ -246,7 +254,7 @@
This is similar to _all
, but instead in this case you can specify the variables that you want affected by the rule that you state. This is useful in cases where you want to specify a rule that only affects a selected number of variables.
df %>%
- replace_with_na_at(.vars = c("x","z"),
+ replace_with_na_at(.vars = c("x","z"),
condition = ~.x == -99)
#> # A tibble: 5 x 4
#> name x y z
@@ -259,22 +267,22 @@
Although you can achieve this with regular replace_with_na()
, it is more concise to use, replace_with_na_at()
. Additionally, you can specify rules as function, for example, make a value NA if the exponent of that number is less than 1:
df %>%
- replace_with_na_at(.vars = c("x","z"),
- condition = ~ exp(.x) < 1)
+ replace_with_na_at(.vars = c("x","z"),
+ condition = ~ exp(.x) < 1)
#> # A tibble: 5 x 4
-#> name x y z
-#> <chr> <dbl> <chr> <lgl>
-#> 1 N/A 1 N/A NA
-#> 2 N A 3 NOt available NA
-#> 3 N / A NA 29 NA
-#> 4 Not Available NA 25 NA
-#> 5 John Smith NA 28 NA
+#> name x y z
+#> <chr> <dbl> <chr> <dbl>
+#> 1 N/A 1 N/A NA
+#> 2 N A 3 NOt available NA
+#> 3 N / A NA 29 NA
+#> 4 Not Available NA 25 NA
+#> 5 John Smith NA 28 NA
replace_with_na_if()
-There may be some cases where you can identify variables based on some test - is.character()
- are they character variables? is.numeric()
- Are they numeric or double? and a given value inside that type of data. For example,
+There may be some cases where you can identify variables based on some test - is.character()
- are they character variables? is.numeric()
- Are they numeric or double? and a given value inside that type of data. For example,
df %>%
replace_with_na_if(.predicate = is.character,
@@ -308,7 +316,7 @@
Notes on alternative ways to handle replacing with NAs
There are some alternative ways to handle replacing values with NA in the tidyverse, na_if
and using readr
. These are ultimately not as expressive as the replace_with_na()
functions, but they are very useful if you only have one kind of value to replace with a missing, and if you know what the missing values are upon reading in the data.
-
+
This function allows you to replace exact values - similar to replace_with_na()
, but for all columns in a data frame. Here is how you would use it in our examples.
# instead of:
@@ -323,7 +331,7 @@
#> 4 Not Available NA 25 -101
#> 5 John Smith -98 28 -1
-df_2 <- df %>% dplyr::na_if(-99)
+df_2 <- df %>% dplyr::na_if(-99)
df_2
#> # A tibble: 5 x 4
#> name x y z
@@ -335,15 +343,15 @@
#> 5 John Smith -98 28 -1
# are they the same?
-all.equal(df_1, df_2)
+all.equal(df_1, df_2)
#> [1] TRUE
Note, however, that na_if()
can only take arguments of length one. This means that it cannot capture other statements like
-na_strings <- c("NA", "N A", "N / A", "N/A", "N/ A", "Not Available", "NOt available")
+na_strings <- c("NA", "N A", "N / A", "N/A", "N/ A", "Not Available", "NOt available")
df_3 <- df %>% replace_with_na_all(condition = ~.x %in% na_strings)
# Not run:
-df_4 <- df %>% dplyr::na_if(x = ., y = na_strings)
+df_4 <- df %>% dplyr::na_if(x = ., y = na_strings)
# Error in check_length(y, x, fmt_args("y"), glue("same as {fmt_args(~x)}")) :
# argument "y" is missing, with no default
It also cannot handle more complex equations, where you want to refer to values in other columns, or values less than or greater than another value.
@@ -381,14 +389,13 @@
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
-
@@ -24,14 +31,15 @@
-
+
+ Toggle navigation
naniar
- 0.4.1
+ 0.4.2
@@ -96,7 +104,7 @@
Special Missing Values
Nicholas Tierney
- 2018-11-20
+ 2019-02-15
Source: vignettes/special-missing-values.Rmd
special-missing-values.Rmd
@@ -113,7 +121,7 @@ 2018-11-20
Terminology
Missing data can be represented as a binary matrix of “missing” or “not missing”, which in naniar
we call a “shadow matrix”, a term borrowed from Swayne and Buja, 1998.
-library(naniar)
+library(naniar)
as_shadow(oceanbuoys)
#> # A tibble: 736 x 8
#> year_NA latitude_NA longitude_NA sea_temp_c_NA air_temp_c_NA humidity_NA
@@ -128,7 +136,7 @@
#> 8 !NA !NA !NA !NA !NA !NA
#> 9 !NA !NA !NA !NA !NA !NA
#> 10 !NA !NA !NA !NA !NA !NA
-#> # ... with 726 more rows, and 2 more variables: wind_ew_NA <fct>,
+#> # … with 726 more rows, and 2 more variables: wind_ew_NA <fct>,
#> # wind_ns_NA <fct>
The shadow matrix
has three key features to facilitate analysis
@@ -151,7 +159,7 @@
#> 8 1997 0 -110 28.0 27.1 78.3 -3.70 4.5
#> 9 1997 0 -110 28.0 27.2 78.6 -4.20 5
#> 10 1997 0 -110 28.0 27.2 76.9 -3.60 3.5
-#> # ... with 726 more rows, and 8 more variables: year_NA <fct>,
+#> # … with 726 more rows, and 8 more variables: year_NA <fct>,
#> # latitude_NA <fct>, longitude_NA <fct>, sea_temp_c_NA <fct>,
#> # air_temp_c_NA <fct>, humidity_NA <fct>, wind_ew_NA <fct>,
#> # wind_ns_NA <fct>
@@ -161,7 +169,7 @@
Recoding missing values
To demonstrate recoding of missing values, we use a toy dataset, dat
:
-df <- tibble::tribble(
+df <- tibble::tribble(
~wind, ~temp,
-99, 45,
68, NA,
@@ -189,11 +197,11 @@
Special types of missingness are encoded in the shadow part nabular data, using the recode_shadow
function, we can recode the missing values like so:
-This reads as “recode shadow for wind where wind is equal to -99, and give it the label”broken_machine". The .where
function is used to help make our intent clearer, and reads very much like the dplyr::case_when()
function, but takes care of encoding extra factor levels into the missing data.
+This reads as “recode shadow for wind where wind is equal to -99, and give it the label”broken_machine". The .where
function is used to help make our intent clearer, and reads very much like the dplyr::case_when()
function, but takes care of encoding extra factor levels into the missing data.
The extra types of missingness are recoded in the shadow part of the nabular data as additional factor levels:
-levels(dfs_recode$wind_NA)
+levels(dfs_recode$wind_NA)
#> [1] "!NA" "NA" "NA_broken_machine"
-levels(dfs_recode$temp_NA)
+levels(dfs_recode$temp_NA)
#> [1] "!NA" "NA" "NA_broken_machine"
All additional types of missingness are recorded across all shadow variables, even if those variables don’t contain that special missing value. This ensures all flavours of missingness are known.
To summarise, to use recode_shadow
, the user provides the following information:
@@ -232,14 +240,13 @@
-
-
+
-
+
-
+
-
+
-
+
@@ -31,17 +38,21 @@
-
+
+
+
+
-
+
+
+
Data Structures, Summaries, and Visualisations for Missing Data • naniar
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
-
@@ -28,14 +35,15 @@
-
+
+ Toggle navigation
naniar
- 0.4.1
+ 0.4.2
@@ -103,7 +111,7 @@
naniar
provides principled, tidy ways to summarise, visualise, and manipulate missing data with minimal deviations from the workflows in ggplot2 and tidy data. It does this by providing:
@@ -151,43 +159,43 @@
Installation
You can install naniar from CRAN:
-
+
Or you can install the development version on github using remotes
:
+remotes::install_github("njtierney/naniar")
A short overview of naniar
Visualising missing data might sound a little strange - how do you visualise something that is not there? One approach to visualising missing data comes from ggobi and manet, which replaces NA
values with values 10% lower than the minimum value in that variable. This visualisation is provided with the geom_miss_point()
ggplot2 geom - which we illustrate by exploring the relationship between Ozone and Solar radiation from the airquality dataset.
-library(ggplot2)
+library(ggplot2)
-ggplot(data = airquality,
- aes(x = Ozone,
+ggplot(data = airquality,
+ aes(x = Ozone,
y = Solar.R)) +
- geom_point()
+ geom_point()
#> Warning: Removed 42 rows containing missing values (geom_point).
ggplot2 does not handle these missing values, and we get a warning message about the missing values.
We can instead use geom_miss_point()
to display the missing data
-library(naniar)
+library(naniar)
-ggplot(data = airquality,
- aes(x = Ozone,
+ggplot(data = airquality,
+ aes(x = Ozone,
y = Solar.R)) +
geom_miss_point()
geom_miss_point()
has shifted the missing values to now be 10% below the minimum value. The missing values are a different colour so that missingness becomes pre-attentive. As it is a ggplot2 geom, it supports features like faceting and other ggplot features.
p1 <-
-ggplot(data = airquality,
- aes(x = Ozone,
+ggplot(data = airquality,
+ aes(x = Ozone,
y = Solar.R)) +
geom_miss_point() +
- facet_wrap(~Month, ncol = 2) +
- theme(legend.position = "bottom")
+ facet_wrap(~Month, ncol = 2) +
+ theme(legend.position = "bottom")
p1
@@ -197,7 +205,7 @@
Data Structures
naniar provides a data structure for working with missing data, the shadow matrix (Swayne and Buja, 1998). The shadow matrix is the same dimension as the data, and consists of binary indicators of missingness of data values, where missing is represented as “NA”, and not missing is represented as “!NA”, and variable names are kep the same, with the added suffix “_NA" to the variables.
-head(airquality)
+head(airquality)
#> Ozone Solar.R Wind Temp Month Day
#> 1 41 190 7.4 67 5 1
#> 2 36 118 8.0 72 5 2
@@ -220,7 +228,7 @@
#> 8 !NA !NA !NA !NA !NA !NA
#> 9 !NA !NA !NA !NA !NA !NA
#> 10 NA !NA !NA !NA !NA !NA
-#> # ... with 143 more rows
+#> # … with 143 more rows
Binding the shadow data to the data you help keep better track of the missing values. This format is called “nabular”, a portmanteau of NA
and tabular
. You can bind the shadow to the data using bind_shadow
or nabular
:
bind_shadow(airquality)
#> # A tibble: 153 x 12
@@ -236,7 +244,7 @@
#> 8 19 99 13.8 59 5 8 !NA !NA !NA
#> 9 8 19 20.1 61 5 9 !NA !NA !NA
#> 10 NA 194 8.6 69 5 10 NA !NA !NA
-#> # ... with 143 more rows, and 3 more variables: Temp_NA <fct>,
+#> # … with 143 more rows, and 3 more variables: Temp_NA <fct>,
#> # Month_NA <fct>, Day_NA <fct>
nabular(airquality)
#> # A tibble: 153 x 12
@@ -252,25 +260,25 @@
#> 8 19 99 13.8 59 5 8 !NA !NA !NA
#> 9 8 19 20.1 61 5 9 !NA !NA !NA
#> 10 NA 194 8.6 69 5 10 NA !NA !NA
-#> # ... with 143 more rows, and 3 more variables: Temp_NA <fct>,
+#> # … with 143 more rows, and 3 more variables: Temp_NA <fct>,
#> # Month_NA <fct>, Day_NA <fct>
Using the nabular format helps you manage where missing values are in your dataset and make it easy to do visualisations where you split by missingness:
airquality %>%
bind_shadow() %>%
- ggplot(aes(x = Temp,
+ ggplot(aes(x = Temp,
fill = Ozone_NA)) +
- geom_density(alpha = 0.5)
+ geom_density(alpha = 0.5)
And even visualise imputations
airquality %>%
bind_shadow() %>%
- simputation::impute_lm(Ozone ~ Temp + Solar.R) %>%
- ggplot(aes(x = Solar.R,
+ simputation::impute_lm(Ozone ~ Temp + Solar.R) %>%
+ ggplot(aes(x = Solar.R,
y = Ozone,
colour = Ozone_NA)) +
- geom_point()
+ geom_point()
#> Warning: Removed 7 rows containing missing values (geom_point).
Or perform an upset plot - to plot of the combinations of missingness across cases, using the gg_miss_upset
function
@@ -305,7 +313,7 @@
Numerical summaries for missing data
-naniar provides numerical summaries of missing data, that follow a consistent rule that uses a syntax begining with miss_
. Summaries focussing on variables or a single selected variable, start with miss_var_
, and summaries for cases (the initial collected row order of the data), they start with miss_case_
. All of these functions that return dataframes also work with dplyr’s group_by()
.
+naniar provides numerical summaries of missing data, that follow a consistent rule that uses a syntax begining with miss_
. Summaries focussing on variables or a single selected variable, start with miss_var_
, and summaries for cases (the initial collected row order of the data), they start with miss_case_
. All of these functions that return dataframes also work with dplyr’s group_by()
.
For example, we can look at the number and percent of missings in each case and variable with miss_var_summary()
, and miss_case_summary()
, which both return output ordered by the number of missing values.
miss_var_summary(airquality)
@@ -332,10 +340,10 @@
#> 8 32 1 16.7
#> 9 33 1 16.7
#> 10 34 1 16.7
-#> # ... with 143 more rows
-You could also group_by()
to work out the number of missings in each variable across the levels within it.
+#> # … with 143 more rows
+
You could also group_by()
to work out the number of missings in each variable across the levels within it.
-library(dplyr)
+library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
@@ -345,7 +353,7 @@
#>
#> intersect, setdiff, setequal, union
airquality %>%
- group_by(Month) %>%
+ group_by(Month) %>%
miss_var_summary()
#> # A tibble: 25 x 4
#> Month variable n_miss pct_miss
@@ -360,7 +368,7 @@
#> 8 6 Wind 0 0
#> 9 6 Temp 0 0
#> 10 6 Day 0 0
-#> # ... with 15 more rows
+#> # … with 15 more rows
You can read more about all of these functions in the vignette “Getting Started with naniar”.
@@ -420,10 +428,10 @@ License
Developers
-- Nicholas Tierney
Author, maintainer
-- Di Cook
Author
-- Miles McBain
Author
-- Colin Fay
Author
+- Nicholas Tierney
Author, maintainer
+- Di Cook
Author
+- Miles McBain
Author
+- Colin Fay
Author
- All authors...
@@ -449,14 +457,13 @@ Dev status
-
-
+
-
+
-
+
-
+
-
+
@@ -31,17 +38,21 @@
-
+
+
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Changelog • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,17 +38,21 @@
-
+
+
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
NA • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,17 +38,21 @@
-
+
+
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Add a column describing presence of any missing values — add_any_miss • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -45,12 +52,15 @@
will be called "any_miss_all", if no variables are specified, otherwise,
if variables are specified, the label will be "any_miss_vars" to indicate
that not all variables have been used to create the labels." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Add a column describing if there are any missings in the dataset — add_label_missings • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Add a column describing whether there is a shadow — add_label_shadow • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -42,12 +49,15 @@
focus on whether there have been any shadows created. This can be useful
when data has been imputed and you need to determine which rows contained
missing values when the shadow was bound to the dataset." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Add a column that tells us which "missingness cluster" a row belongs to — add_miss_cluster • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -43,12 +50,15 @@
see some clustering in the data, but you do not have a way to identify
the cluster. Future work will incorporate the seriation package to
allow for better control over the clustering from the user." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Add column containing number of missing data values — add_n_miss • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -41,12 +48,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Add column containing proportion of missing data values — add_prop_miss • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -43,12 +50,15 @@
"prop_miss", which contains the proportion of missing values in that row.
You can specify the variables that you would like to show the missingness
for." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Add a shadow column to dataframe — add_shadow • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -41,12 +48,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Add a shadow shifted column to a dataset — add_shadow_shift • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -41,12 +48,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Add a counter variable for a span of dataframe — add_span_counter • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Identify if all values are missing or complete — all-is-miss-complete • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Helper function to determine whether all rows are complete — all_row_complete • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Helper function to determine whether all rows are missing — all_row_miss • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Identify if there are any missing or complete values — any-na • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -42,12 +49,15 @@
Are two functions that do this in naniar - any_miss and it's alias
any_na. These bother under the hood call anyNA. any_complete is
the complement to any_miss - it returns TRUE if there are any complete values." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Helper function to determine whether there are any missings — any_row_miss • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Create shadow data — as_shadow.data.frame • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Create shadows — as_shadow • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -44,12 +51,15 @@
of missingness of data values, where missing is represented as "NA", and not
missing is represented as "!NA". Although these may be represented as 1 and
0, respectively." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Convert data into shadow format for doing an upset plot — as_shadow_upset • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Bind a shadow dataframe to original data — bind_shadow • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Add a shadow column to a dataset — cast_shadow • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -46,12 +53,15 @@
block for the functions cast_shadow_shift, and cast_shadow_shift_label.
It also respects the dplyr verbs starts_with, contains, ends_with, etc.
to select variables." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Add a shadow and a shadow_shift column to a dataset — cast_shadow_shift • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Add a shadow column and a shadow shifted column to a dataset — cast_shadow_shift_label • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Common number values for NA — common_na_numbers • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -45,12 +52,15 @@
possible missings, but I strongly warn against using this to replace NA
values without very carefully looking at the incidence for each of the
cases. Common NA strings are in the data object common_na_strings." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Common string values for NA — common_na_strings • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -48,12 +55,15 @@
uses \ around the "?", "." and "*" characters to protect against using
their wildcard features in grep. Common NA numbers are in the data object
common_na_numbers." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Key drawing functions — draw_key • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Long form representation of a shadow matrix — gather_shadow • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -41,12 +48,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
geom_miss_point — geom_miss_point • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -42,12 +49,15 @@
values in ggplot2. To do so it uses methods from ggobi to display missing
data points 10% below the minimum value, so that the values can be seen on
the same axis." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Plot the number of missings per case (row) — gg_miss_case • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -41,12 +48,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Plot of cumulative sum of missing for cases — gg_miss_case_cumsum • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -41,12 +48,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Plot the number of missings for each variable, broken down by a factor — gg_miss_fct • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -41,12 +48,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Plot the number of missings in a given repeating span — gg_miss_span • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -42,12 +49,15 @@
imputeTS::plotNA.distributionBar(tsNH4, breaksize = 100), which shows the
number of missings in a given span, or breaksize. A default minimal theme
is used, which can be customised as normal for ggplot." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Plot the pattern of missingness using an upset plot. — gg_miss_upset • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -41,12 +48,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Plot the number of missings for each variable — gg_miss_var • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -42,12 +49,15 @@
number of missings in each variable, ordered to show which variables have
the most missing data. A default minimal theme is used, which can be
customised as normal for ggplot." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Plot of cumulative sum of missing value for each variable — gg_miss_var_cumsum • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -41,12 +48,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Plot which variables contain a missing value — gg_miss_which • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -41,12 +48,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Group By Helper — group_by_fun • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Impute data with values shifted 10% below range. — impute_below • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -44,12 +51,15 @@
values adds a new string or label. It is powered by shadow_shift, so
please see the documentation for shadow_shift() to full details on the
different implementations." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Impute data with values shifted 10% below range. — impute_below_all • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -42,12 +49,15 @@
the data. impute_below_all imputes all variables with missings to have
values 10
values adds a new string or label." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Scoped variants of impute_below
— impute_below_at • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -42,12 +49,15 @@
that satisfy a specific condition, use the scoped variants,
impute_below_at, and impute_below_if. To use _at effectively,
you must know that _at`` affects variables selected with a character vector, or with vars()`." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Scoped variants of impute_below
— impute_below_if • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -41,12 +48,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Impute the mean value into a vector with missing values — impute_mean • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Impute the median value into a vector with missing values — impute_median • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Function reference • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,17 +38,21 @@
-
+
+
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Detect if this is a shade — is_shade • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Test if input is or are shadow variables — is_shadow • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -41,12 +48,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Label a missing from one column — label_miss_1d • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
label_miss_2d — label_miss_2d • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Is there a missing value in the row of a dataframe? — label_missings • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Label shadow values as missing or not missing — label_shadow • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Percentage of cases that contain a missing or complete values. — miss-complete-case-pct • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Proportion of cases that contain a missing or complete values. — miss-complete-case-prop • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Percentage of variables containing missings or complete values — miss-complete-var-pct • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Proportion of variables containing missings or complete values — miss-complete-var-prop • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Summarise the missingness in each case — miss_case_summary • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -41,12 +48,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Tabulate missings in cases. — miss_case_table • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Proportions of missings in data, variables, and cases. — miss_prop_summary • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -42,12 +49,15 @@
Specifically, returning how many elements in a dataframe contain a missing
value, how many elements in a variable contain a missing value, and how many
elements in a case contain a missing." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Search and present different kinds of missing values — miss_scan_count • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -44,12 +51,15 @@
if they are there, and if so, where they are. miss_scan_count makes it
easier for users to search for particular occurrences of these values
across their variables." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Collate summary measures from naniar into one tibble — miss_summary • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Find the number of missing and complete values in a single run — miss_var_run • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -42,12 +49,15 @@
The function, miss_var_run(), returns a dataframe with the column names
"run_length" and "is_na", which describe the length of the run, and
whether that run describes a missing value." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Summarise the number of missings for a given repeating span on a variable — miss_var_span • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -43,12 +50,15 @@
miss_var_span takes a data.frame object, a variable, and a span_every
argument and returns a dataframe containing the number of missing values
within each span." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Summarise the missingness in each variable — miss_var_summary • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -41,12 +48,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Tabulate the missings in the variables — miss_var_table • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -41,12 +48,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Which variables contain missing values? — miss_var_which • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -42,12 +49,15 @@
of the variables that contain missing values. miss_var_which returns a
vector of variable names that contain missings. It will return NULL when
there are no missings." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
The number of variables with complete values — n-var-case-complete • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
The number of variables or cases with missing values — n-var-case-miss • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Return the number of complete values — n_complete • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Return a vector of the number of complete values in each row — n_complete_row • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Return the number of missing values — n_miss • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Return a vector of the number of missing values in each row — n_miss_row • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Convert data into nabular form by binding shade to it — nabular • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
naniar-ggproto — GeomMissPoint • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
naniar — naniar • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -41,12 +48,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Create a new nabular format — new_nabular • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Create a new shade factor — new_shade • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Create a new shadow — new_shadow • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
West Pacific Tropical Atmosphere Ocean Data, 1993 & 1997. — oceanbuoys • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -42,12 +49,15 @@
and prediction of El Ni'o and La Ni'a. The data is collected by the
Tropical Atmosphere Ocean project
(http://www.pmel.noaa.gov/tao/index.shtml)." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Percentage of cases that contain a missing or complete values. — pct-miss-complete-case • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Percentage of variables containing missings or complete values — pct-miss-complete-var • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Return the percent of complete values — pct_complete • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Return the percent of missing values — pct_miss • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Pedestrian count information around Melbourne for 2016 — pedestrian • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -43,12 +50,15 @@
Spencer St-Collins St (south), recorded from January 1st 2016 at 00:00:00
to December 31st 2016 at 23:00:00. The data is made free and publicly
available from https://data.melbourne.vic.gov.au/Transport-Movement/Pedestrian-volume-updated-monthly-/b2ak-trbp" />
+
+
-
+
+
+
+
+
+
+
+
+
+Plotly helpers (Convert a geom to a "basic" geom.) — plotly_helpers • naniar
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Toggle navigation
+
+
+
+
+
+ naniar
+ 0.4.2
+
+
+
+
+
+ -
+ Getting Started
+
+-
+ Gallery
+
+-
+
+ Articles
+
+
+
+
+ -
+ Replace with NA
+
+ -
+ Special Missings
+
+ -
+ Exploring Imputations
+
+
+
+-
+ Reference
+
+-
+ News
+
+
+
+
+ -
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Plotly helpers (Convert a geom to a "basic" geom.)
+ Source: R/geom2plotly.R
+ plotly_helpers.Rd
+
+
+
+
+ Helper functions to make it easier to automatically create
+plotly charts. This function makes it possible to convert ggplot2 geoms
+that are not included with ggplot2 itself. Users shouldn't need to use this
+function. It exists purely to allow other package authors to write their
+own conversion method(s).
+
+
+
+ to_basic.GeomMissPoint(data, prestats_data, layout, params, p, ...)
+
+ Arguments
+
+
+
+ data
+ the data returned by ggplot2::ggplot_build()
.
+
+
+ prestats_data
+ the data before statistics are computed.
+
+
+ layout
+ the panel layout.
+
+
+ params
+ parameters for the geom, statistic, and 'constant' aesthetics
+
+
+ p
+ a ggplot2 object (the conversion may depend on scales, for
+instance).
+
+
+ ...
+ currently ignored
+
+
+
+
+
+
+ Contents
+
+ - Arguments
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/prop-miss-complete-case.html b/docs/reference/prop-miss-complete-case.html
index cd1afa3f..a028a31b 100644
--- a/docs/reference/prop-miss-complete-case.html
+++ b/docs/reference/prop-miss-complete-case.html
@@ -1,6 +1,6 @@
-
+
@@ -8,18 +8,25 @@
Proportion of cases that contain a missing or complete values. — prop-miss-complete-case • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Proportion of variables containing missings or complete values — prop-miss-complete-var • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Return the proportion of complete values — prop_complete • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Return a vector of the proportion of missing values in each row — prop_complete_row • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Return the proportion of missing values — prop_miss • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Return a vector of the proportion of missing values in each row — prop_miss_row • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Add special missing values to the shadow matrix — recode_shadow • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Objects exported from other packages — reexports • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -47,12 +54,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Replace values with missings — replace_to_na • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Replace values with missings — replace_with_na • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Replace all values with NA where a certain condition is met — replace_with_na_all • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Replace specified variables with NA where a certain condition is met — replace_with_na_at • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Replace values with NA based on some condition, for variables that meet some predicate — replace_with_na_if • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -9,18 +9,25 @@
The Behavioral Risk Factor Surveillance System (BRFSS) Survey
Data, 2009. — riskfactors • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -32,7 +39,7 @@
-
+
@@ -43,12 +50,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Scoped variants of impute_mean
— scoped-impute_mean • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -43,12 +50,15 @@
that satisfy a specific condition, use the scoped variants,
impute_below_at, and impute_below_if. To use _at effectively,
you must know that _at`` affects variables selected with a character vector, or with vars()`." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Scoped variants of impute_median
— scoped-impute_median • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -43,12 +50,15 @@
that satisfy a specific condition, use the scoped variants,
impute_below_at, and impute_below_if. To use _at effectively,
you must know that _at`` affects variables selected with a character vector, or with vars()`." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Create new levels of missing — shade • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -42,12 +49,15 @@
not missing, and NA indicates missingness. It also allows you to specify
some new missings, if you like. This function is what powers the factor
levels in as_shadow()." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Expand and relevel a shadow column with a new suffix — shadow_expand_relevel • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Reshape shadow data into a long format — shadow_long • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Shift missing values to facilitate missing data exploration/visualisation — shadow_shift • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -43,12 +50,15 @@
variables, the values are shifted to 10
variable plus some jittered noise, to separate repeated values, so that
missing values can be visualised along with the rest of the data." />
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Shift (impute) numeric values for graphical exploration — shadow_shift.numeric • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
stat_miss_point — stat_miss_point • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Test if input is a data.frame — test_if_dataframe • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Test if the input is Missing — test_if_missing • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Test if the input is NULL — test_if_null • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Test if input is a shadow — test_if_shadow • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Unbind (remove) shadow from data, and vice versa — unbinders • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Expand all shadow levels — update_shadow • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
check the levels of many things — what_levels • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Split a call into two components with a useful verb name — where • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -41,12 +48,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Which rows and cols contain missings? — where_na • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -40,12 +47,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Which variables are shades? — which_are_shade • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+
-
+
@@ -8,18 +8,25 @@
Which elements contain missings? — which_na • naniar
+
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
@@ -31,7 +38,7 @@
-
+
@@ -39,12 +46,15 @@
+
+
-
+
+