Skip to content

Commit

Permalink
To v0.4.2
Browse files Browse the repository at this point in the history
  • Loading branch information
SixiangHu committed Dec 24, 2015
1 parent ac73878 commit b105725
Show file tree
Hide file tree
Showing 13 changed files with 162 additions and 97 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: DataMan
Type: Package
Title: This is an R package for data and model analysis
Title: a R package for data and model analysis
Version: 0.4.1
Date: 2015-12-04
Author: Sixiang Hu <sixiang.hu@gmail.com>
Expand All @@ -15,6 +15,7 @@ Imports:
Rcpp (>= 0.11.6),
dplyr,
rbokeh,
plotly,
ROCR,
gbm,
randomForest
Expand Down
10 changes: 7 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
# Generated by roxygen2: do not edit by hand
useDynLib(DataMan)
importFrom(Rcpp, evalCpp)
exportPattern("^[[:alpha:]]+")

S3method(CramersV,data.frame)
S3method(CramersV,default)
Expand All @@ -27,22 +24,28 @@ export(DataSummary)
export(DetMiss)
export(PopMiss)
export(dataPlot)
export(interPlot)
export(liftPlot)
export(modelPlot)
export(resiPlot)
export(rocPlot)
export(tree2data)
importFrom(ROCR,performance)
importFrom(ROCR,prediction)
importFrom(Rcpp,evalCpp)
importFrom(data.table,":=")
importFrom(data.table,as.data.table)
importFrom(data.table,data.table)
importFrom(data.table,dcast)
importFrom(data.table,key)
importFrom(data.table,setkey)
importFrom(dplyr,"%>%")
importFrom(dplyr,group_by)
importFrom(dplyr,left_join)
importFrom(dplyr,summarise)
importFrom(gbm,pretty.gbm.tree)
importFrom(plotly,layout)
importFrom(plotly,plot_ly)
importFrom(randomForest,getTree)
importFrom(rbokeh,figure)
importFrom(rbokeh,grid_plot)
Expand All @@ -52,3 +55,4 @@ importFrom(rbokeh,ly_hist)
importFrom(rbokeh,ly_lines)
importFrom(rbokeh,ly_points)
importFrom(rbokeh,y_axis)
useDynLib(DataMan)
4 changes: 4 additions & 0 deletions NEWS.Rmd → NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ DataMan 0.4.1
-------------------------------------------------------------
NEW FEATURES

* Add `interPlot` that gives 3D surface plot utilising `plotly` package.

* Add `liftPlot` that compares different model predictions.

* Add `rocPlot` that assesses binary classification under `ROCR` package. It plots ROC curves with one or more predictions. AUC will be calculated and showed in legend.
Expand All @@ -17,3 +19,5 @@ BUG FIXES AND MINOR IMPROVEMENTS
* Change the glyphs of points in `modelPlot`

* Change `.travis.yaml` to install `rbokeh` from github.

* Improve the `PopMiss`.
8 changes: 8 additions & 0 deletions R/DataMan.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#' DataMan a data clearning, analysis and statistical modeling assessing tool
#'
#' @description a data clearning, analysis and statistical modeling assessing tool.
#' @docType package
#' @name DataMan
#' @useDynLib DataMan
#' @importFrom Rcpp evalCpp
NULL
92 changes: 32 additions & 60 deletions R/PopMiss.r
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#'
#' @description This function allows you to populate missing values.
#' @usage PopMiss(data,na.treatment=c("mean.or.mode","delete","replace"),replace=NULL)
#' @param data This could be data frame, matrix or a vector.
#' @param data This could be data frame,data.table, matrix or a vector.
#' @param na.treatment One of "mean.or.mode", "delete", or "replace", to specify the method to populate missing values.
#' @param replace A single value used for populating ALL missing value. May not be useful for a data frame with missing values in different type of variable.
#' @seealso DetMiss
Expand All @@ -13,7 +13,13 @@
#' PopMiss(a,"mean.or.mode")

PopMiss <- function(data,na.treatment=c("mean.or.mode","delete","replace"),replace=NULL){
if (is.null(na.treatment)) na.treatment <- "mean.or.mode"

na.treatment <- match.arg(na.treatment)

if ((na.treatment %in% c("replace","r")) && is.null(replace))
stop("NULL value used for populating NAs. Chosen \"replace\" method, but didn't provide a value for replace parameter?")

UseMethod("PopMiss",data)
}

Expand All @@ -27,14 +33,13 @@ PopMiss.factor<-function(data,na.treatment,replace){
#' @rdname PopMiss
PopMiss.character<-function(data,na.treatment,replace){
num_miss <- sum(is.na(data))
if(isTRUE(na.treatment=="delete")) {
if(identical(na.treatment,"delete")) {
return(data[!is.na(data)])
}
else if(isTRUE(na.treatment=="mean.or.mode")){
else if(identical(na.treatment,"mean.or.mode")){
replace <- names(table(data))[order(table(data),decreasing=TRUE)[1]]
}

if (is.null(replace)) stop("NULL value used for populating NAs. Chosen \"replace\" method, but didn't provide a value for replace parameter?")
data[is.na(data)] <- replace
return(data)
}
Expand All @@ -43,13 +48,12 @@ PopMiss.character<-function(data,na.treatment,replace){
#' @rdname PopMiss
PopMiss.integer<-function(data,na.treatment,replace){
num_miss <- length(which(is.na(data)))
if(isTRUE(na.treatment=="delete")) {
if(identical(na.treatment,"delete")) {
return(na.omit(data))
}
else if(isTRUE(na.treatment=="mean.or.mode")){
else if(identical(na.treatment,"mean.or.mode")){
replace <- floor(mean(data,na.rm=TRUE))
}
if (is.null(replace)) stop("NULL value used for populating NAs. Chosen \"replace\" method, but didn't provide a value for replace parameter?")
data[is.na(data)] <- replace
return(data)
}
Expand All @@ -64,49 +68,45 @@ PopMiss.Date<-function(data,na.treatment,replace){
#' @rdname PopMiss
PopMiss.numeric<-function(data,na.treatment,replace){
num_miss <- length(which(is.na(data)))
if(isTRUE(na.treatment=="delete")) {
cat(paste(num_miss," rows have been delete.\n\n"))
if(identical(na.treatment,"delete")) {
return(na.omit(data))
}
else if(isTRUE(na.treatment=="mean.or.mode")){
else if(identical(na.treatment,"mean.or.mode")){
replace <- mean(data,na.rm=TRUE)
}
if (is.null(replace)) stop("NULL value used for populating NAs. Chosen \"replace\" method, but didn't provide a value for replace parameter?")

data[is.na(data)] <- replace
cat(paste(num_miss," row have been populated by value:",replace,".\n\n"))
return(data)
}

#' @export
#' @rdname PopMiss
PopMiss.data.frame<-function(data,na.treatment,replace){
num_miss <- which(sapply(data,function(x) any(is.na(x))))
if(isTRUE(na.treatment=="delete")) {
if(identical(na.treatment,"delete")) {
return(na.omit(data))
}
else if(isTRUE(na.treatment=="mean.or.mode")){
str_name <- names(data)
for(i in num_miss){
if(class(data[,str_name[i]]) %in% c("character","Date","factor"))
replace <- names(table(data[,str_name[i]]))[order(table(data[,str_name[i]]),decreasing=TRUE)[1]]
else if(class(data[,str_name[i]])=="integer" )
replace <- floor(mean(data[,str_name[i]],na.rm=TRUE))
else if(identical(na.treatment,"mean.or.mode")){
ind <- dim(data)[2]
for(i in 1:ind){
if(class(data[[i]]) %in% c("character","Date","factor"))
replace <- names(table(data[[i]]))[order(table(data[[i]]),decreasing=TRUE)[1]]
else if(class(data[[i]])=="integer" )
replace <- floor(mean(data[[i]],na.rm=TRUE))
else
replace <- mean(data[,str_name[i]],na.rm=TRUE)
data[is.na(data[,str_name[i]]),str_name[i]] <- replace
replace <- mean(data[[i]],na.rm=TRUE)
data[[i]][is.na(data[[i]])] <- replace
}

return(data)
}
else{
if (is.null(replace)) stop("NULL value used for populating NAs. Chosen \"replace\" method, but didn't provide a value for replace parameter?")

warning("Provided replacement will be used for all missing value.")
str_name <- names(data)
for(i in num_miss){
if ("Date" %in% class(data[,str_name[i]]) ) data[is.na(data[,str_name[i]]),str_name[i]] <- as.Date("1960-01-01")
else data[is.na(data[,str_name[i]]),str_name[i]] <- replace
ind <- dim(data)[2]
for(i in 1:ind){
if ("Date" %in% class(data[[i]]) )
data[[i]][is.na(data[[i]])] <- as.Date("1960-01-01")
else
data[[i]][is.na(data[[i]])] <- replace
}
return(data)
}
Expand All @@ -115,42 +115,14 @@ PopMiss.data.frame<-function(data,na.treatment,replace){
#' @export
#' @rdname PopMiss
PopMiss.data.table <-function(data,na.treatment,replace){
num_miss <- which(data[,sapply(.SD,function(x) any(is.na(x)))])
if(isTRUE(na.treatment=="delete")) {
return(na.omit(data))
}
else if(isTRUE(na.treatment=="mean.or.mode")){
str_name <- names(data)
for(i in num_miss){
if( data[,class(get(str_name[i]))] %in% c("character","Date","factor"))
replace <- as.data.frame(data[,.N,get(str_name[i])][order(-N)])[1,1]
else if( data[,class(get(str_name[i]))]=="integer" )
replace <- data[,floor(mean(get(str_name[i]),na.rm=TRUE))]
else
replace <- data[,mean(get(str_name[i]),na.rm=TRUE)]

data[which(is.na(data[,i,with=FALSE])),i] <- replace
}

return(data)
}
else{
if (is.null(replace)) stop("NULL value used for populating NAs. Chosen \"replace\" method, but didn't provide a value for replace parameter?")

warning("Provided replacement will be used for all missing value.")
str_name <- names(data)
for(i in num_miss){
if ("Date" %in% class(data[,i,with=FALSE])) data[which(is.na(data[,i,with=FALSE])),i] <- as.Date("1960-01-01")
data[which(is.na(data[,i,with=FALSE])),i] <- replace
}
return(data)
}
PopMiss.data.frame(data,na.treatment,replace)
}

#' @export
#' @rdname PopMiss
PopMiss.matrix <- function(data,na.treatment,replace){
if(identical(na.treatment,"delete")) warning("na.treatment is \"delete\" and 'data' is a matrix. This only works correctly if whole rows are missing")
if(identical(na.treatment,"delete"))
warning("na.treatment is \"delete\" and 'data' is a matrix. This only works correctly if whole rows are missing")
matrix(PopMiss(as.vector(data),na.treatment,replace),ncol=ncol(data))
}

Expand Down
2 changes: 0 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
# This file was generated by Rcpp::compileAttributes
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' @rdname CramersV
CramersV_C <- function(x, y) {
.Call('DataMan_CramersV_C', PACKAGE = 'DataMan', x, y)
}

#' @rdname CramersV
CramersV_DF <- function(dm) {
.Call('DataMan_CramersV_DF', PACKAGE = 'DataMan', dm)
}
Expand Down
3 changes: 3 additions & 0 deletions R/dataPlot.r
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ dataPlot <- function(data,xvar,yvar,byvar=NULL,weights=NULL,
length.out=newGroupNum
)
x <- cut(x,new_band,include.lowest = TRUE)

# solve the order issue when plotting using rbokeh.
# can also set the label in cut function.
x <- as.character(paste(as.integer(x),as.character(x),sep=";"))
}

Expand Down
60 changes: 60 additions & 0 deletions R/interPlot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#' interPlot
#'
#' @description Interactive 3D plot by calling `plotly` package functions.
#' @usage interPlot(data,xvar,yvar,zvar)
#' @param data a data frame.
#' @param xvar either an integer to specify the position of the variable in the data frame, or the name of the variable.
#' @param yvar either an integer to specify the position of the variable in the data frame, or the name of the variable.
#' @param zvar either an integer to specify the position of the variable in the data frame, or the name of the variable.
#' @details This functions gives a 3D interactive view of between factors.
#' This is really useful when modeller wants to assess the interaction terms.
#' Hence the "zvar" can be actual response data or model predictions.
#' @author Sixiang Hu
#' @importFrom data.table data.table setkey key := dcast
#' @importFrom plotly plot_ly layout
#' @export interPlot
#' @examples
#'
#' interPlot(mtcars,"wt","mpg","vs")

interPlot <- function(data,xvar,yvar,zvar){
# Error Trap
if( .isDFnull(data) ) stop("data set provided is null.")
if( is.null(xvar) ) stop("X variable provided is null.")
if( is.null(yvar) ) stop("Y variable provided is null.")
if( is.null(zvar) ) stop("Z variable provided is null.")

# Find data column
posi <- .VarPosition(data,xvar)
x <- data[[posi$posi]]

posi <- .VarPosition(data,yvar)
y <- data[[posi$posi]]

posi <- .VarPosition(data,zvar)
z <- data[[posi$posi]]

dt <- data.table::data.table(x = x,
y = y,
z = z)
data.table::setkey(dt,x,y)

dt2 <- data.table::dcast(dt,x~y,fun.aggregate=mean,
na.rm=TRUE,
value.var="z",
drop=FALSE)

zvalue <- suppressWarnings(DataMan::PopMiss(as.matrix(dt2[,-1,with=FALSE]),
na.treatment = "replace",
0))

plotly::plot_ly(
x=dt2[[1]],
y=colnames(dt2),
z=zvalue,
type = "surface",
showlegend=FALSE) %>%
plotly::layout(xaxis=list(title=xvar),
yaxis=list(title=yvar),
zaxis=list(title=zvar))
}
8 changes: 1 addition & 7 deletions man/CramersV.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 0 additions & 23 deletions man/DataMan-package.Rd

This file was deleted.

11 changes: 11 additions & 0 deletions man/DataMan.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit b105725

Please sign in to comment.