Skip to content

Commit

Permalink
Add resiPlot function
Browse files Browse the repository at this point in the history
  • Loading branch information
SixiangHu committed Sep 22, 2015
1 parent a56c837 commit 299faa3
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 28 deletions.
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,20 @@ export(DetMiss)
export(PopMiss)
export(dataPlot)
export(modelPlot)
export(resiPlot)
export(tree2data)
importFrom(data.table,":=")
importFrom(data.table,as.data.table)
importFrom(data.table,data.table)
importFrom(data.table,setkey)
importFrom(ggplot2,aes)
importFrom(ggplot2,element_text)
importFrom(ggplot2,ggplot)
importFrom(googleVis,gvisComboChart)
importFrom(grid,grid.newpage)
importFrom(grid,pushViewport)
importFrom(grid,unit.pmax)
importFrom(grid,viewport)
importFrom(gridExtra,grid.arrange)
importFrom(reshape2,melt)
importFrom(scales,date_format)
Expand Down
18 changes: 9 additions & 9 deletions R/DataSummary.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' @details This function provides a data summary including min, max, number of unique values and number if missing values.
#' The min and max will ignore missing value in the data. The input should be a `data.frame`.
#' @author Sixiang Hu
#' @importFrom data.table as.data.table :=
#' @importFrom data.table data.table :=
#' @export DataSummary
#' @examples
#' DataSummary(cars)
Expand All @@ -34,7 +34,7 @@ DataSummary.data.frame <- function(data,wt=NULL,sparkline=FALSE){
dsMean <- sapply(data,function(x){
if(is.numeric(x) || is.integer(x)) as.character(round(weighted.mean(x,weight,na.rm = TRUE),6))
else {
x.dt<-data.table(x,weight)
x.dt<-data.table::data.table(x,weight)
dsTemp <- as.character(x.dt[,sum(weight),by=x][order(-V1)][1,list(x)])
if(is.null(dsTemp) || is.na(dsTemp)) as.character(x.dt[,sum(weight),by=x][order(-V1)][2,list(x)])
else dsTemp
Expand All @@ -44,7 +44,7 @@ DataSummary.data.frame <- function(data,wt=NULL,sparkline=FALSE){
dsMax <- sapply(data,function(x){
if(is.numeric(x) || is.integer(x)) as.character(round(max(x,na.rm = TRUE),6))
else {
x.dt<-data.table(x,weight)
x.dt<-data.table::data.table(x,weight)
dsTemp <- as.character(x.dt[,sum(weight),by=x][order(-V1)][1,list(x)])
if(is.null(dsTemp) || is.na(dsTemp)) as.character(x.dt[,sum(weight),by=x][order(-V1)][2,list(x)])
else dsTemp
Expand All @@ -54,7 +54,7 @@ DataSummary.data.frame <- function(data,wt=NULL,sparkline=FALSE){
dsMin <- sapply(data,function(x){
if(is.numeric(x) || is.integer(x)) as.character(round(min(x,na.rm = TRUE),6))
else {
x.dt<-data.table(x,weight)
x.dt<-data.table::data.table(x,weight)
dsTemp <- as.character(x.dt[,sum(weight),by=x][order(V1)][1,list(x)])
if(is.null(dsTemp) || is.na(dsTemp)) as.character(x.dt[,sum(weight),by=x][order(V1)][2,list(x)])
else dsTemp
Expand All @@ -63,7 +63,7 @@ DataSummary.data.frame <- function(data,wt=NULL,sparkline=FALSE){

if (sparkline) {
dsStr <- sapply(data,function(x) {
freq <- data.table(x)[,.N,by=x][order(x)][,f:=N/sum(N)]
freq <- data.table::data.table(x)[,.N,by=x][order(x)][,f:=N/sum(N)]
if (dim(freq)[1]>=100) return("More than 100 levels")
else return(paste0(round(freq[,f],3),collapse= ","))
})
Expand Down Expand Up @@ -104,7 +104,7 @@ DataSummary.data.table <- function(data,wt=NULL,sparkline=FALSE){
dsMean <- data[,sapply(.SD,function(x){
if(is.numeric(x) || is.integer(x)) as.character(round(weighted.mean(x,weight,na.rm = TRUE),6))
else {
x.dt<- data.table(x,weight)
x.dt<- data.table::data.table(x,weight)
dsTemp <- as.character(x.dt[,sum(weight),by=x][order(-V1)][1,list(x)])
if(is.null(dsTemp) || is.na(dsTemp)) as.character(x.dt[,sum(weight),by=x][order(-V1)][2,list(x)])
else dsTemp
Expand All @@ -114,7 +114,7 @@ DataSummary.data.table <- function(data,wt=NULL,sparkline=FALSE){
dsMax <- data[,sapply(.SD,function(x){
if(is.numeric(x) || is.integer(x)) as.character(round(max(x,na.rm = TRUE),6))
else {
x.dt<-data.table(x,weight)
x.dt<-data.table::data.table(x,weight)
dsTemp <- as.character(x.dt[,sum(weight),by=x][order(-V1)][1,list(x)])
if(is.null(dsTemp) || is.na(dsTemp)) as.character(x.dt[,sum(weight),by=x][order(-V1)][2,list(x)])
else dsTemp
Expand All @@ -124,7 +124,7 @@ DataSummary.data.table <- function(data,wt=NULL,sparkline=FALSE){
dsMin <- data[,sapply(data,function(x){
if(is.numeric(x) || is.integer(x)) as.character(round(min(x,na.rm = TRUE),6))
else {
x.dt<-data.table(x,weight)
x.dt<-data.table::data.table(x,weight)
dsTemp <- as.character(x.dt[,sum(weight),by=x][order(V1)][1,list(x)])
if(is.null(dsTemp) || is.na(dsTemp)) as.character(x.dt[,sum(weight),by=x][order(V1)][2,list(x)])
else dsTemp
Expand All @@ -133,7 +133,7 @@ DataSummary.data.table <- function(data,wt=NULL,sparkline=FALSE){

if (sparkline) {
dsStr <- data[,sapply(data,function(x) {
freq <- data.table(x)[,.N,by=x][order(x)][,f:=N/sum(N)]
freq <- data.table::data.table(x)[,.N,by=x][order(x)][,f:=N/sum(N)]
if (dim(freq)[1]>=100) return("More than 100 levels")
else return(paste0(round(freq[,f],3),collapse= ","))
})]
Expand Down
26 changes: 8 additions & 18 deletions R/PopMiss.r
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,12 @@ PopMiss <- function(data,na.treatment=c("mean.or.mode","delete","replace"),repla
}

#' @export
PopMiss.factor<-function(data,na.treatment,replace=NULL){
num_miss <- length(which(is.na(data)))
if(isTRUE(na.treatment=="delete")) {
return(na.omit(data))
}
else if(isTRUE(na.treatment=="mean.or.mode")){
replace <- names(table(data))[order(table(data),decreasing=TRUE)[1]]
}

if (is.null(replace)) stop("NULL value used for populating NAs. Chosen \"replace\" method, but didn't provide a value for replace parameter?")
data[is.na(data)] <- replace
return(data)
PopMiss.factor<-function(data,na.treatment,replace){
PopMiss.character(data,na.treatment,replace)
}

#' @export
PopMiss.character<-function(data,na.treatment,replace=NULL){
PopMiss.character<-function(data,na.treatment,replace){
num_miss <- sum(is.na(data))
if(isTRUE(na.treatment=="delete")) {
return(data[!is.na(data)])
Expand All @@ -48,7 +38,7 @@ PopMiss.character<-function(data,na.treatment,replace=NULL){
}

#' @export
PopMiss.integer<-function(data,na.treatment,replace=NULL){
PopMiss.integer<-function(data,na.treatment,replace){
num_miss <- length(which(is.na(data)))
if(isTRUE(na.treatment=="delete")) {
return(na.omit(data))
Expand All @@ -62,7 +52,7 @@ PopMiss.integer<-function(data,na.treatment,replace=NULL){
}

#' @export
PopMiss.numeric<-function(data,na.treatment,replace=NULL){
PopMiss.numeric<-function(data,na.treatment,replace){
num_miss <- length(which(is.na(data)))
if(isTRUE(na.treatment=="delete")) {
cat(paste(num_miss," rows have been delete.\n\n"))
Expand All @@ -79,7 +69,7 @@ PopMiss.numeric<-function(data,na.treatment,replace=NULL){
}

#' @export
PopMiss.data.frame<-function(data,na.treatment,replace=NULL){
PopMiss.data.frame<-function(data,na.treatment,replace){
num_miss <- which(sapply(data,function(x) any(is.na(x))))
if(isTRUE(na.treatment=="delete")) {
return(na.omit(data))
Expand Down Expand Up @@ -111,7 +101,7 @@ PopMiss.data.frame<-function(data,na.treatment,replace=NULL){
}

#' @export
PopMiss.data.table <-function(data,na.treatment,replace=NULL){
PopMiss.data.table <-function(data,na.treatment,replace){
num_miss <- which(data[,sapply(.SD,function(x) any(is.na(x)))])
if(isTRUE(na.treatment=="delete")) {
return(na.omit(data))
Expand Down Expand Up @@ -144,7 +134,7 @@ PopMiss.data.table <-function(data,na.treatment,replace=NULL){
}

#' @export
PopMiss.matrix <- function(data,na.treatment,replace=NULL){
PopMiss.matrix <- function(data,na.treatment,replace){
if(identical(na.treatment,"delete")) warning("na.treatment is \"delete\" and 'data' is a matrix. This only works correctly if whole rows are missing")
matrix(PopMiss(as.vector(data),na.treatment,replace),ncol=ncol(data))
}
1 change: 0 additions & 1 deletion R/resiPlot → R/resiPlot.r
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#' @importFrom ggplot2 aes ggplot
#' @importFrom grid grid.newpage viewport pushViewport
#' @export resiPlot
#' @examples
#'

resiPlot <- function(act,pred,bucket=20){
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ iris.mod <- gbm(Species ~ ., distribution="multinomial", data=iris, n.trees=2000
tree_data <- tree2data(iris.mod,1)
sankeyNetwork(tree_data[[1]],tree_data[[2]],Source="src",Target="tar",Value="value",NodeID="name")


* `resiPlot`: This function assess the residual using given actual and predicted values.

```

### Getting Started
Expand Down
32 changes: 32 additions & 0 deletions man/resiPlot.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/resiPlot.r
\name{resiPlot}
\alias{resiPlot}
\title{resiPlot}
\usage{
resiPlot(act,pred,bucket=20)
}
\arguments{
\item{act}{numerical vector for actual observation.}

\item{pred}{numerical vector for model preidctions. It must have the same length as act.}

\item{bucket}{Integer. It specifies the number of bucket of the AvsE plot.}
}
\description{
This function assess the residual using given actual and predicted values.
}
\details{
Currently, the residual in this function is defined as: Residual = actual - predicted. I don't use `resi` or `reisdual` function
from `stats` package because this function will be used for much wider model assess (e.g. `randomFoest`, `gbm`), but 2 functions
mentioned about can only applied to `glm` and `lm`. May code in some other residual function.
This function will give 3 plots:
Residual vs Prediction: This plot is used to assess the baise and heterogeneity
Residual histogram: Check the residual distribution.
AvsE plot: The aggregated (by provided number of bucket) average actual and predicted value, with a diagnal line for comparison.
}
\author{
Sixiang Hu
}

0 comments on commit 299faa3

Please sign in to comment.