diff --git a/.Rhistory b/.Rhistory
deleted file mode 100644
index 1492a7d..0000000
--- a/.Rhistory
+++ /dev/null
@@ -1,512 +0,0 @@
-#' @title find the closest index
-#'
-#' @description
-#' This function finds the closest index to a given value in a vector.
-#'
-#' @param x a vector
-#' @param y a value
-#'
-#' @return the index of the closest value in the vector
-#'
-#' @examples
-#' find_max_index(c(1, 2, 3, 4, 5), 3.5)
-#' @export
-find_index <- function(x, y) {
-index <- which(x == y)
-if (length(index) >= 1) {
-index_name <- names(x)[index]
-value<-get_quantily_value(index_name)
-return(value)
-}
-else {
-closest_index <- which(abs(x - y) == min(abs(x-y)))
-closest_index_name <- names(x)[closest_index]
-value <- get_quantily_value(closest_index_name)
-return(value)
-}
-}
-#' @title find the right rank
-#'
-#' @description
-#' This function finds the right rank of a response value in a quantile random forest.
-#'
-#' @param response a vector of response values
-#' @param outMatrix a matrix of out values
-#' @param median_outMatrix a vector of median out values
-#' @param rmse_ a vector of rmse values
-#'
-#' @return a vector of ranks
-#'
-get_right_rank <- function(response,outMatrix,median_outMatrix,rmse_){
-rank_value <-c()
-for (i in 1:length(response)){
-rank_<- find_index(outMatrix[i,],response[i])
-if (length(rank_)>1){
-diff = response[i] -median_outMatrix[i]
-if (abs(diff)>3*rmse_ & diff<0 ){
-min_value <- min(rank_)
-rank_value<-c(rank_value,min_value)
-} else if (abs(diff)>3*rmse_ & diff>0) {
-max_value <- max(rank_)
-rank_value<-c(rank_value,max_value)
-}else {
-mean_value <- mean(rank_)
-rank_value<-c(rank_value,mean_value)
-}
-}else {
-rank_value<-c(rank_value,rank_)
-}
-}
-return(rank_value)
-}
-find_quantile_position <- function(x, data) {
-ecdf_data <- ecdf(data)
-return(ecdf_data(x))
-}
-#' @title find outliers
-#'
-#' @description
-#' This function finds outliers in a dataset using quantile random forests.
-#'
-#' @param data a data frame
-#' @param quantiles_type 'all':seq(from = 0.001, to = 0.999, by = 0.001),'other':c(threshold,0.5,1-threshold)
-#' @param threshold a threshold for outlier detection
-#' @param verbose a boolean value indicating whether to print verbose output
-#' @param ... additional arguments passed to the ranger function
-#'
-#' @return a data frame of outliers
-#'
-#' @examples
-#' outqrf(iris)
-#' @export
-outqrf <-function(data,
-quantiles_type=1000,
-threshold =0.025,
-verbose = 1,
-...){
-data <- as.data.frame(data)
-numeric_features <- names(data)[sapply(data,is.numeric)]
-threshold_low<-threshold
-threshold_high<-1-threshold
-rmse <-c()
-oob.error <-c()
-r.squared <-c()
-outliers <- data.frame()
-outMatrixs <- list()
-if(quantiles_type == 1000){
-quantiles <- seq(0.001, 0.999,0.001)
-}else if(quantiles_type == 400){
-quantiles <- c(seq(0.0025,0.9975,0.0025))
-}else{
-quantiles <- c(seq(0.025,0.9975,0.025))
-}
-if (verbose) {
-cat("\nOutlier identification by quantiles random forests\n")
-cat("\n  Variables to check:\t\t")
-cat(numeric_features, sep = ", ")
-cat("\n  Variables used to check:\t")
-cat(names(data), sep = ", ")
-cat("\n\n  Checking: ")
-}
-for (v in numeric_features){
-if (verbose) {
-cat(v, " ")
-}
-covariables <- setdiff(names(data), v)
-qrf <- ranger::ranger(
-formula = stats::reformulate(covariables, response = v),
-data = data,
-quantreg = TRUE,
-...)
-pred <- predict(qrf, data[,covariables], type = "quantiles",quantiles=quantiles)
-oob.error <- c(oob.error,qrf$prediction.error)
-r.squared <- c(qrf$r.squared,r.squared)
-outMatrix <- pred$predictions
-outMatrixs[[v]]<-outMatrix
-median_outMatrix <- outMatrix[,(length(quantiles)+1)/2]
-response<- data[,v]
-diffs = response - median_outMatrix
-rmse_ <- sqrt(sum(diffs*diffs)/(length(diffs)-1))
-rmse <- c(rmse,rmse_)
-rank_value <- get_right_rank(response,outMatrix,median_outMatrix,rmse_)
-#rank_value <- find_quantile_position(response,outMatrix)
-outlier <- data.frame(row = as.numeric(row.names(data)),col = v,observed = response, predicted = median_outMatrix,rank = rank_value)
-outlier<- outlier|>dplyr::filter(rank<=threshold_low| rank>=threshold_high)
-outliers <- rbind(outliers,outlier)
-}
-names(rmse) <- numeric_features
-names(oob.error) <- numeric_features
-names(r.squared) <- numeric_features
-list(
-Data = data,
-outliers = outliers,
-n_outliers = table(outliers$col),
-threshold = threshold,
-rmse = rmse,
-oob.error = oob.error,
-r.squared = r.squared,
-outMatrixs =outMatrixs
-)
-}
-qrf = outqrf(data,quantiles_type=400)
-qrf$outliers
-qrf$n_outliers
-qrf$oob.error
-qrf$r.squared
-library(outqrf)
-out <- outForest(data)
-out$outliers
-qrf = outqrf(data,quantiles_type=40)
-#' @title get numberic value from string
-#'
-#' @description
-#' This function extracts the numeric value from a string.
-#'
-#' @param name a string
-#'
-#' @return a numeric value
-#'
-#' @examples
-#' get_quantily_value("quantiles = 0.001")
-#' @export
-get_quantily_value <- function(name){
-str<- gsub("[^0-9.]", "", name)
-value <- as.numeric(str)
-return(value)
-}
-#' @title find the closest index
-#'
-#' @description
-#' This function finds the closest index to a given value in a vector.
-#'
-#' @param x a vector
-#' @param y a value
-#'
-#' @return the index of the closest value in the vector
-#'
-#' @examples
-#' find_max_index(c(1, 2, 3, 4, 5), 3.5)
-#' @export
-find_index <- function(x, y) {
-index <- which(x == y)
-if (length(index) >= 1) {
-index_name <- names(x)[index]
-value<-get_quantily_value(index_name)
-return(value)
-}
-else {
-closest_index <- which(abs(x - y) == min(abs(x-y)))
-closest_index_name <- names(x)[closest_index]
-value <- get_quantily_value(closest_index_name)
-return(value)
-}
-}
-#' @title find the right rank
-#'
-#' @description
-#' This function finds the right rank of a response value in a quantile random forest.
-#'
-#' @param response a vector of response values
-#' @param outMatrix a matrix of out values
-#' @param median_outMatrix a vector of median out values
-#' @param rmse_ a vector of rmse values
-#'
-#' @return a vector of ranks
-#'
-get_right_rank <- function(response,outMatrix,median_outMatrix,rmse_){
-rank_value <-c()
-for (i in 1:length(response)){
-rank_<- find_index(outMatrix[i,],response[i])
-if (length(rank_)>1){
-diff = response[i] -median_outMatrix[i]
-if (abs(diff)>3*rmse_ & diff<0 ){
-min_value <- min(rank_)
-rank_value<-c(rank_value,min_value)
-} else if (abs(diff)>3*rmse_ & diff>0) {
-max_value <- max(rank_)
-rank_value<-c(rank_value,max_value)
-}else {
-mean_value <- mean(rank_)
-rank_value<-c(rank_value,mean_value)
-}
-}else {
-rank_value<-c(rank_value,rank_)
-}
-}
-return(rank_value)
-}
-find_quantile_position <- function(x, data) {
-ecdf_data <- ecdf(data)
-return(ecdf_data(x))
-}
-#' @title find outliers
-#'
-#' @description
-#' This function finds outliers in a dataset using quantile random forests.
-#'
-#' @param data a data frame
-#' @param quantiles_type 'all':seq(from = 0.001, to = 0.999, by = 0.001),'other':c(threshold,0.5,1-threshold)
-#' @param threshold a threshold for outlier detection
-#' @param verbose a boolean value indicating whether to print verbose output
-#' @param ... additional arguments passed to the ranger function
-#'
-#' @return a data frame of outliers
-#'
-#' @examples
-#' outqrf(iris)
-#' @export
-outqrf <-function(data,
-quantiles_type=1000,
-threshold =0.025,
-verbose = 1,
-...){
-data <- as.data.frame(data)
-numeric_features <- names(data)[sapply(data,is.numeric)]
-threshold_low<-threshold
-threshold_high<-1-threshold
-rmse <-c()
-oob.error <-c()
-r.squared <-c()
-outliers <- data.frame()
-outMatrixs <- list()
-if(quantiles_type == 1000){
-quantiles <- seq(0.001, 0.999,0.001)
-}else if(quantiles_type == 400){
-quantiles <- c(seq(0.0025,0.9975,0.0025))
-}else{
-quantiles <- c(seq(0.025,0.9975,0.025))
-}
-if (verbose) {
-cat("\nOutlier identification by quantiles random forests\n")
-cat("\n  Variables to check:\t\t")
-cat(numeric_features, sep = ", ")
-cat("\n  Variables used to check:\t")
-cat(names(data), sep = ", ")
-cat("\n\n  Checking: ")
-}
-for (v in numeric_features){
-if (verbose) {
-cat(v, " ")
-}
-covariables <- setdiff(names(data), v)
-qrf <- ranger::ranger(
-formula = stats::reformulate(covariables, response = v),
-data = data,
-quantreg = TRUE,
-...)
-pred <- predict(qrf, data[,covariables], type = "quantiles",quantiles=quantiles)
-oob.error <- c(oob.error,qrf$prediction.error)
-r.squared <- c(qrf$r.squared,r.squared)
-outMatrix <- pred$predictions
-outMatrixs[[v]]<-outMatrix
-median_outMatrix <- outMatrix[,(length(quantiles)+1)/2]
-response<- data[,v]
-diffs = response - median_outMatrix
-rmse_ <- sqrt(sum(diffs*diffs)/(length(diffs)-1))
-rmse <- c(rmse,rmse_)
-#rank_value <- get_right_rank(response,outMatrix,median_outMatrix,rmse_)
-rank_value <- find_quantile_position(response,outMatrix)
-outlier <- data.frame(row = as.numeric(row.names(data)),col = v,observed = response, predicted = median_outMatrix,rank = rank_value)
-outlier<- outlier|>dplyr::filter(rank<=threshold_low| rank>=threshold_high)
-outliers <- rbind(outliers,outlier)
-}
-names(rmse) <- numeric_features
-names(oob.error) <- numeric_features
-names(r.squared) <- numeric_features
-list(
-Data = data,
-outliers = outliers,
-n_outliers = table(outliers$col),
-threshold = threshold,
-rmse = rmse,
-oob.error = oob.error,
-r.squared = r.squared,
-outMatrixs =outMatrixs
-)
-}
-qrf = outqrf(data,quantiles_type=400)
-qrf$n_outliers
-qrf$outliers
-setwd("E:/github/outqrf")
-system("R CMD build outqrf")
-system("R CMD build outqrf")
-renv::status()
-setwd("E:/github")
-system("R CMD build outqrf")
-setwd("E:/github/outqrf")
-system("R CMD check --as-cran")
-setwd("E:/github")
-system("R CMD check --as-cran")
-system("R CMD check --as-cran outqrf")
-system("R CMD check --as-cran outqrf")
-system("R CMD check --as-cran outqrf")
-system("R CMD check --as-cran outqrf")
-setwd("E:/github")
-system("R CMD check --as-cran outqrf")
-setwd("E:/github/outqrf")
-system("R CMD check --as-cran outqrf")
-setwd("E:/github")
-setwd("E:/github/outqrf")
-setwd("E:/github")
-system("R CMD check --as-cran outqrf")
-system("R CMD check --as-cran outqrf")
-system("R CMD check --as-cran outqrf")
-#' @title Plots outqrf
-#' @description
-#' This function can plot paired boxplot of an "outqrf" object.
-#' It helps us to better observe the relationship between the original and predicted values
-#' @param qrf An object of class "outqrf".
-#' @returns A ggplot2 object
-#' @export
-#' @examples
-#' irisWithOutliers <- generateOutliers(iris, seed = 2024)
-#' qrf <- outqrf(irisWithOutliers)
-#' plot(qrf)
-plot.outqrf<- function(qrf) {
-result_df <- data.frame()
-data <- qrf$Data
-for (i in seq_along(qrf$outMatrixs)) {
-temp_df <- as.data.frame(qrf$outMatrixs[[i]][,qrf$quantiles_type/2])
-if (nrow(result_df) == 0) {
-result_df <- temp_df
-} else {
-result_df <- cbind(result_df, temp_df)
-}
-}
-names(result_df) = names(qrf$outMatrixs)
-result_df <- dplyr::mutate(result_df,tag = "predicted")
-numeric_features <- names(data)[sapply(data,is.numeric)]
-data <- data[numeric_features]
-data <- dplyr::mutate(data,tag = "observed")
-plot_in <-rbind(result_df,data)
-plot_in_longer<- plot_in|>tidyr::pivot_longer(!tag,names_to ="features",values_to ="value" )
-p<- ggpubr::ggpaired(plot_in_longer, x="tag", y="value",
-fill="tag", palette = "jco",
-line.color = "grey", line.size =0.8, width = 0.4,short.panel.labs = FALSE)+
-ggpubr::stat_compare_means(label = "p.format", paired = TRUE)+ggplot2::theme(legend.position = "none")+ggplot2::facet_wrap(~features, scales = "free")
-return(p)
-}
-irisWithOutliers <- generateOutliers(iris, seed = 2024)
-library(outqrf)
-#' @title Plots outqrf
-#' @description
-#' This function can plot paired boxplot of an "outqrf" object.
-#' It helps us to better observe the relationship between the original and predicted values
-#' @param qrf An object of class "outqrf".
-#' @returns A ggplot2 object
-#' @export
-#' @examples
-#' irisWithOutliers <- generateOutliers(iris, seed = 2024)
-#' qrf <- outqrf(irisWithOutliers)
-#' plot(qrf)
-plot.outqrf<- function(qrf) {
-result_df <- data.frame()
-data <- qrf$Data
-for (i in seq_along(qrf$outMatrixs)) {
-temp_df <- as.data.frame(qrf$outMatrixs[[i]][,qrf$quantiles_type/2])
-if (nrow(result_df) == 0) {
-result_df <- temp_df
-} else {
-result_df <- cbind(result_df, temp_df)
-}
-}
-names(result_df) = names(qrf$outMatrixs)
-result_df <- dplyr::mutate(result_df,tag = "predicted")
-numeric_features <- names(data)[sapply(data,is.numeric)]
-data <- data[numeric_features]
-data <- dplyr::mutate(data,tag = "observed")
-plot_in <-rbind(result_df,data)
-plot_in_longer<- plot_in|>tidyr::pivot_longer(!tag,names_to ="features",values_to ="value" )
-p<- ggpubr::ggpaired(plot_in_longer, x="tag", y="value",
-fill="tag", palette = "jco",
-line.color = "grey", line.size =0.8, width = 0.4,short.panel.labs = FALSE)+
-ggpubr::stat_compare_means(label = "p.format", paired = TRUE)+ggplot2::theme(legend.position = "none")+ggplot2::facet_wrap(~features, scales = "free")
-return(p)
-}
-irisWithOutliers <- generateOutliers(iris, seed = 2024)
-qrf <- outqrf(irisWithOutliers)
-plot(qrf)
-#' @title Plots outqrf
-#' @description
-#' This function can plot paired boxplot of an "outqrf" object.
-#' It helps us to better observe the relationship between the original and predicted values
-#' @param qrf An object of class "outqrf".
-#' @returns A ggplot2 object
-#' @export
-#' @examples
-#' irisWithOutliers <- generateOutliers(iris, seed = 2024)
-#' qrf <- outqrf(irisWithOutliers)
-#' plot(qrf)
-plot.outqrf<- function(x) {
-result_df <- data.frame()
-data <- x$Data
-for (i in seq_along(x$outMatrixs)) {
-temp_df <- as.data.frame(x$outMatrixs[[i]][,x$quantiles_type/2])
-if (nrow(result_df) == 0) {
-result_df <- temp_df
-} else {
-result_df <- cbind(result_df, temp_df)
-}
-}
-names(result_df) = names(x$outMatrixs)
-result_df <- dplyr::mutate(result_df,tag = "predicted")
-numeric_features <- names(data)[sapply(data,is.numeric)]
-data <- data[numeric_features]
-data <- dplyr::mutate(data,tag = "observed")
-plot_in <-rbind(result_df,data)
-plot_in_longer<- plot_in|>tidyr::pivot_longer(!tag,names_to ="features",values_to ="value" )
-p<- ggpubr::ggpaired(plot_in_longer, x="tag", y="value",
-fill="tag", palette = "jco",
-line.color = "grey", line.size =0.8, width = 0.4,short.panel.labs = FALSE)+
-ggpubr::stat_compare_means(label = "p.format", paired = TRUE)+ggplot2::theme(legend.position = "none")+ggplot2::facet_wrap(~features, scales = "free")
-return(p)
-}
-plot(qrf)
-importFrom("stats", "predict", "rnorm", "sd")
-S3method
-system("R CMD check --as-cran outqrf")
-#' @title Plots outqrf
-#' @description
-#' This function can plot paired boxplot of an "outqrf" object.
-#' It helps us to better observe the relationship between the original and predicted values
-#' @param qrf An object of class "outqrf".
-#' @returns A ggplot2 object
-#' @export
-#' @examples
-#' irisWithOutliers <- generateOutliers(iris, seed = 2024)
-#' qrf <- outqrf(irisWithOutliers)
-#' plot(qrf)
-plot<- function(x) {
-result_df <- data.frame()
-data <- x$Data
-for (i in seq_along(x$outMatrixs)) {
-temp_df <- as.data.frame(x$outMatrixs[[i]][,x$quantiles_type/2])
-if (nrow(result_df) == 0) {
-result_df <- temp_df
-} else {
-result_df <- cbind(result_df, temp_df)
-}
-}
-names(result_df) = names(x$outMatrixs)
-result_df <- dplyr::mutate(result_df,tag = "predicted")
-numeric_features <- names(data)[sapply(data,is.numeric)]
-data <- data[numeric_features]
-data <- dplyr::mutate(data,tag = "observed")
-plot_in <-rbind(result_df,data)
-plot_in_longer<- plot_in|>tidyr::pivot_longer(!tag,names_to ="features",values_to ="value" )
-p<- ggpubr::ggpaired(plot_in_longer, x="tag", y="value",
-fill="tag", palette = "jco",
-line.color = "grey", line.size =0.8, width = 0.4,short.panel.labs = FALSE)+
-ggpubr::stat_compare_means(label = "p.format", paired = TRUE)+ggplot2::theme(legend.position = "none")+ggplot2::facet_wrap(~features, scales = "free")
-return(p)
-}
-plot(qrf)
-devtools::document()
-setwd("E:/github/outqrf")
-devtools::document()
-library(outqrf)
-irisWithOutliers <- generateOutliers(iris, p = 0.05,seed =2024)
-qrf <- outqrf(irisWithOutliers,quantiles_type=400)
-plot(qrf)
-devtools::check()
-pkgdown::build_site()