From 91205131f30e751b9530e10dbf8696b58263afc8 Mon Sep 17 00:00:00 2001 From: yshin12 Date: Mon, 6 Nov 2023 08:33:48 -0500 Subject: [PATCH] Path/studentization error fixed --- R/RcppExports.R | 12 ++++++------ R/sgd_lm.R | 18 +++++++++++++++--- R/sgd_qr.R | 19 ++++++++++++++----- R/sgdi_lm.R | 18 ++++++++++++++++-- R/sgdi_qr.R | 14 +++++++++++--- man/sgd_lm.Rd | 16 +++++++++++----- man/sgd_qr.Rd | 17 ++++++++++------- man/sgdi_lm.Rd | 16 +++++++++++----- man/sgdi_qr.Rd | 10 +++++----- src/RcppExports.cpp | 29 +++++++++++++++++------------ src/sgd_lm.cpp | 27 ++++++++++++++++++++++++--- src/sgd_qr.cpp | 26 ++++++++++++++++++++++++-- src/sgdi_lm.cpp | 35 ++++++++++++++++++++++++++++++----- src/sgdi_qr.cpp | 18 +++++++++--------- tests/testthat/test-sgdi_qr.R | 2 +- 15 files changed, 204 insertions(+), 73 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index a446dde..182d64d 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,16 +1,16 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -sgd_lm_cpp <- function(x, y, burn, gamma_0, alpha, bt_start, x_mean, x_sd) { - .Call('_SGDinference_sgd_lm_cpp', PACKAGE = 'SGDinference', x, y, burn, gamma_0, alpha, bt_start, x_mean, x_sd) +sgd_lm_cpp <- function(x, y, burn, gamma_0, alpha, bt_start, x_mean, x_sd, path, path_index) { + .Call('_SGDinference_sgd_lm_cpp', PACKAGE = 'SGDinference', x, y, burn, gamma_0, alpha, bt_start, x_mean, x_sd, path, path_index) } -sgd_qr_cpp <- function(x, y, burn, gamma_0, alpha, bt_start, tau, x_mean, x_sd, path) { - .Call('_SGDinference_sgd_qr_cpp', PACKAGE = 'SGDinference', x, y, burn, gamma_0, alpha, bt_start, tau, x_mean, x_sd, path) +sgd_qr_cpp <- function(x, y, burn, gamma_0, alpha, bt_start, tau, x_mean, x_sd, path, path_index) { + .Call('_SGDinference_sgd_qr_cpp', PACKAGE = 'SGDinference', x, y, burn, gamma_0, alpha, bt_start, tau, x_mean, x_sd, path, path_index) } -sgdi_lm_cpp <- function(x, y, burn, gamma_0, alpha, bt_start, inference, rss_idx, x_mean, x_sd) { - .Call('_SGDinference_sgdi_lm_cpp', PACKAGE = 'SGDinference', x, y, burn, gamma_0, alpha, bt_start, inference, rss_idx, x_mean, x_sd) +sgdi_lm_cpp <- function(x, y, burn, gamma_0, alpha, bt_start, inference, rss_idx, x_mean, x_sd, path, path_index) { + .Call('_SGDinference_sgdi_lm_cpp', PACKAGE = 'SGDinference', x, y, burn, gamma_0, alpha, bt_start, inference, rss_idx, x_mean, x_sd, path, path_index) } sgdi_qr_cpp <- function(x, y, burn, gamma_0, alpha, bt_start, inference, tau, rss_idx, x_mean, x_sd, path, path_index) { diff --git a/R/sgd_lm.R b/R/sgd_lm.R index e756d36..1add53d 100644 --- a/R/sgd_lm.R +++ b/R/sgd_lm.R @@ -13,11 +13,14 @@ #' @param no_studentize numeric. The number of observations to compute the mean and std error for studentization. Default is 100. #' @param intercept logical. Use the intercept term for regressors. Default is TRUE. #' If this option is TRUE, the first element of the parameter vector is the intercept term. +#' @param path logical. The whole path of estimation results is out. Default is FALSE. +#' @param path_index numeric. A vector of indices to print out the path. Default is 1. #' #' @return #' An object of class \code{"sgdi"}, which is a list containing the following #' \describe{ #' \item{\code{coefficients}}{a vector of estimated parameter values} +#' \item{\code{path_coefficients}}{The path of coefficients.} #' } #' @note{The dimension of \code{coefficients} is (p+1) if \code{intercept}=TRUE or p otherwise.} #' @@ -40,8 +43,9 @@ sgd_lm = function(formula, bt_start = NULL, studentize = TRUE, no_studentize = 100L, - intercept = TRUE - ){ + intercept = TRUE, + path = FALSE, + path_index = c(1)){ cl <- match.call() mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data"), names(mf), 0L) @@ -103,7 +107,7 @@ sgd_lm = function(formula, #---------------------------------------------- # Linear (Mean) Regression #---------------------------------------------- - out = sgd_lm_cpp(x, y, burn, gamma_0, alpha, bt_start, x_mean=x_mean_in, x_sd=x_sd_in) + out = sgd_lm_cpp(x, y, burn, gamma_0, alpha, bt_start, x_mean=x_mean_in, x_sd=x_sd_in, path=path, path_index=path_index) beta_hat = out$beta_hat # Re-scale parameters to reflect the studentization @@ -137,6 +141,14 @@ result.out$ci.lower = NULL result.out$ci.upper = NULL result.out$gamma_0 = gamma_0 +if (path){ + if (studentize){ + result.out$path_coefficients = (out$beta_hat_path) %*% t(rescale_matrix[path_index, path_index] ) + } else { + result.out$path_coefficients = out$beta_hat_path + } +} + return(result.out) } diff --git a/R/sgd_qr.R b/R/sgd_qr.R index babcfbd..8570140 100644 --- a/R/sgd_qr.R +++ b/R/sgd_qr.R @@ -14,12 +14,14 @@ #' @param no_studentize numeric. The number of observations to compute the mean and std error for studentization. Default is 100. #' @param intercept logical. Use the intercept term for regressors. Default is TRUE. #' If this option is TRUE, the first element of the parameter vector is the intercept term. -#' @param path logical. Compute the whole path of parameters. Default is FALSE. +#' @param path logical. The whole path of estimation results is out. Default is FALSE. +#' @param path_index numeric. A vector of indices to print out the path. Default is 1. #' #' @return #' An object of class \code{"sgdi"}, which is a list containing the following #' \describe{ #' \item{\code{coefficients}}{a vector of estimated parameter values} +#' \item{\code{path_coefficients}}{The path of coefficients.} #' } #' @note{The dimension of \code{coefficients} is (p+1) if \code{intercept}=TRUE or p otherwise.} #' @@ -44,8 +46,8 @@ sgd_qr = function(formula, studentize = TRUE, no_studentize = 100L, intercept = TRUE, - path = FALSE - ){ + path = FALSE, + path_index = c(1)){ cl <- match.call() mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data"), names(mf), 0L) @@ -108,7 +110,7 @@ sgd_qr = function(formula, # Quantile Regression #---------------------------------------------- - out = sgd_qr_cpp(x, y, burn, gamma_0, alpha, bt_start=bt_start, tau=qt, x_mean=x_mean_in, x_sd=x_sd_in, path=path) + out = sgd_qr_cpp(x, y, burn, gamma_0, alpha, bt_start=bt_start, tau=qt, x_mean=x_mean_in, x_sd=x_sd_in, path=path, path_index=path_index) beta_hat = out$beta_hat # Re-scale parameters to reflect the studentization @@ -142,7 +144,14 @@ result.out$V <- NULL result.out$ci.lower = NULL result.out$ci.upper = NULL result.out$gamma_0 = gamma_0 - + +if (path){ + if (studentize){ + result.out$path_coefficients = (out$beta_hat_path) %*% t(rescale_matrix[path_index, path_index]) + } else { + result.out$path_coefficients = out$beta_hat_path + } +} return(result.out) } diff --git a/R/sgdi_lm.R b/R/sgdi_lm.R index 14db9ec..0b95e0a 100644 --- a/R/sgdi_lm.R +++ b/R/sgdi_lm.R @@ -18,6 +18,8 @@ #' @param rss_idx numeric. Index of x for random scaling subset inference. Default is 1, the first regressor of x. #' For example, if we want to focus on the 1st and 3rd covariates of x, then set it to be c(1,3). #' @param level numeric. The confidence level required. Default is 0.95. Can choose 0.90 and 0.80. +#' @param path logical. The whole path of estimation results is out. Default is FALSE. +#' @param path_index numeric. A vector of indices to print out the path. Default is 1. #' #' @return #' An object of class \code{"sgdi"}, which is a list containing the following @@ -26,6 +28,8 @@ #' \item{\code{var}}{A (p+1)x (p+1) variance-covariance matrix of \code{coefficient}} #' \item{\code{ci.lower}}{The lower part of the 95\% confidence interval} #' \item{\code{ci.upper}}{The upper part of the 95\% confidence interval} +#' \item{\code{level}}{The confidence level required. Default is 0.95.} +#' \item{\code{path_coefficients}}{The path of coefficients.} #' } #' @export #' @@ -49,7 +53,9 @@ sgdi_lm = function(formula, no_studentize = 100L, intercept = TRUE, rss_idx = c(1), - level = 0.95){ + level = 0.95, + path = FALSE, + path_index = c(1)){ cl <- match.call() mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data"), names(mf), 0L) @@ -124,7 +130,7 @@ sgdi_lm = function(formula, #---------------------------------------------- # Linear (Mean) Regression #---------------------------------------------- - out = sgdi_lm_cpp(x, y, burn, gamma_0, alpha, bt_start, inference, rss_idx, x_mean=x_mean_in, x_sd=x_sd_in) + out = sgdi_lm_cpp(x, y, burn, gamma_0, alpha, bt_start, inference, rss_idx, x_mean=x_mean_in, x_sd=x_sd_in, path=path, path_index=path_index) beta_hat = out$beta_hat if (inference == "rs"){ V_out = out$V_hat @@ -205,6 +211,14 @@ sgdi_lm = function(formula, result.out$rss_idx_r = rss_idx_r } + if (path){ + if (studentize){ + result.out$path_coefficients = (out$beta_hat_path) %*% t(rescale_matrix[path_index, path_index]) + } else { + result.out$path_coefficients = out$beta_hat_path + } + } + return(result.out) } diff --git a/R/sgdi_qr.R b/R/sgdi_qr.R index 4fb7267..8f9caee 100644 --- a/R/sgdi_qr.R +++ b/R/sgdi_qr.R @@ -31,6 +31,8 @@ #' \item{\code{ci.lower}}{a vector of lower confidence limits} #' \item{\code{ci.upper}}{a vector of upper confidence limits} #' \item{\code{inference}}{character that specifies the inference method} +#' \item{\code{level}}{The confidence level required. Default is 0.95.} +#' \item{\code{path_coefficients}}{The path of coefficients.} #' } #' @note{The dimension of \code{coefficients} is (p+1) if \code{intercept}=TRUE or p otherwise. #' The random scaling matrix \code{V} is a full matrix if "rs" is chosen; @@ -188,9 +190,7 @@ sgdi_qr = function(formula, result.out$terms <- mt result.out$V <- V_out - if (path){ - result.out$beta_hat_path = out$beta_hat_path - } + if (level == 0.95) { critical.value = 6.747 # From Abadir and Paruolo (1997) Table 1. 97.5% @@ -228,6 +228,14 @@ sgdi_qr = function(formula, result.out$rss_idx_r = rss_idx_r } + if (path){ + if (studentize){ + result.out$path_coefficients = (out$beta_hat_path) %*% t(rescale_matrix[path_index, path_index]) + } else { + result.out$path_coefficients = out$beta_hat_path + } + } + return(result.out) } diff --git a/man/sgd_lm.Rd b/man/sgd_lm.Rd index 8f4dfb4..a57b72d 100644 --- a/man/sgd_lm.Rd +++ b/man/sgd_lm.Rd @@ -7,13 +7,15 @@ sgd_lm( formula, data, - gamma_0 = 1, - alpha = 0.667, + gamma_0 = NULL, + alpha = 0.501, burn = 1, bt_start = NULL, studentize = TRUE, no_studentize = 100L, - intercept = TRUE + intercept = TRUE, + path = FALSE, + path_index = c(1) ) } \arguments{ @@ -21,9 +23,9 @@ sgd_lm( \item{data}{an optional data frame containing variables in the model.} -\item{gamma_0}{numeric. A tuning parameter for the learning rate (gamma_0 x t ^ alpha). Default is 1.} +\item{gamma_0}{numeric. A tuning parameter for the learning rate (gamma_0 x t ^ alpha). Default is NULL and it is determined by the adaptive method: 1/sd(y).} -\item{alpha}{numeric. A tuning parameter for the learning rate (gamma_0 x t ^ alpha). Default is 0.667.} +\item{alpha}{numeric. A tuning parameter for the learning rate (gamma_0 x t ^ alpha). Default is 0.501.} \item{burn}{numeric. A tuning parameter for "burn-in" observations. We burn-in up to (burn-1) observations and use observations from (burn) for estimation. Default is 1, i.e. no burn-in.} @@ -36,6 +38,10 @@ We burn-in up to (burn-1) observations and use observations from (burn) for esti \item{intercept}{logical. Use the intercept term for regressors. Default is TRUE. If this option is TRUE, the first element of the parameter vector is the intercept term.} + +\item{path}{logical. The whole path of estimation results is out. Default is FALSE.} + +\item{path_index}{numeric. A vector of indices to print out the path. Default is 1.} } \value{ An object of class \code{"sgdi"}, which is a list containing the following diff --git a/man/sgd_qr.Rd b/man/sgd_qr.Rd index 0d9688e..54cd198 100644 --- a/man/sgd_qr.Rd +++ b/man/sgd_qr.Rd @@ -7,15 +7,16 @@ sgd_qr( formula, data, - gamma_0 = 1, - alpha = 0.667, + gamma_0 = NULL, + alpha = 0.501, burn = 1, bt_start = NULL, qt = 0.5, studentize = TRUE, no_studentize = 100L, intercept = TRUE, - path = FALSE + path = FALSE, + path_index = c(1) ) } \arguments{ @@ -23,14 +24,14 @@ sgd_qr( \item{data}{an optional data frame containing variables in the model.} -\item{gamma_0}{numeric. A tuning parameter for the learning rate (gamma_0 x t ^ alpha). Default is 1.} +\item{gamma_0}{numeric. A tuning parameter for the learning rate (gamma_0 x t ^ alpha). Default is NULL and it is determined by the adaptive method in Chet et al. (2023).} -\item{alpha}{numeric. A tuning parameter for the learning rate (gamma_0 x t ^ alpha). Default is 0.667.} +\item{alpha}{numeric. A tuning parameter for the learning rate (gamma_0 x t ^ alpha). Default is 0.501.} \item{burn}{numeric. A tuning parameter for "burn-in" observations. We burn-in up to (burn-1) observations and use observations from (burn) for estimation. Default is 1, i.e. no burn-in.} -\item{bt_start}{numeric. (p x 1) vector, excluding the intercept term. User-provided starting value. Default is NULL.} +\item{bt_start}{numeric. (p x 1) vector, excluding the intercept term. User-provided starting value. Default is NULL. Then, it is estimated by conquer.} \item{qt}{numeric. Quantile. Default is 0.5.} @@ -41,7 +42,9 @@ We burn-in up to (burn-1) observations and use observations from (burn) for esti \item{intercept}{logical. Use the intercept term for regressors. Default is TRUE. If this option is TRUE, the first element of the parameter vector is the intercept term.} -\item{path}{logical. Compute the whole path of parameters. Default is FALSE.} +\item{path}{logical. The whole path of estimation results is out. Default is FALSE.} + +\item{path_index}{numeric. A vector of indices to print out the path. Default is 1.} } \value{ An object of class \code{"sgdi"}, which is a list containing the following diff --git a/man/sgdi_lm.Rd b/man/sgdi_lm.Rd index cf24771..da8eccb 100644 --- a/man/sgdi_lm.Rd +++ b/man/sgdi_lm.Rd @@ -7,8 +7,8 @@ sgdi_lm( formula, data, - gamma_0 = 1, - alpha = 0.667, + gamma_0 = NULL, + alpha = 0.501, burn = 1, inference = "rs", bt_start = NULL, @@ -16,7 +16,9 @@ sgdi_lm( no_studentize = 100L, intercept = TRUE, rss_idx = c(1), - level = 0.95 + level = 0.95, + path = FALSE, + path_index = c(1) ) } \arguments{ @@ -24,9 +26,9 @@ sgdi_lm( \item{data}{an optional data frame containing variables in the model.} -\item{gamma_0}{numeric. A tuning parameter for the learning rate (gamma_0 x t ^ alpha). Default is 1.} +\item{gamma_0}{numeric. A tuning parameter for the learning rate (gamma_0 x t ^ alpha). Default is NULL and it is determined by the adaptive method: 1/sd(y).} -\item{alpha}{numeric. A tuning parameter for the learning rate (gamma_0 x t ^ alpha). Default is 0.667.} +\item{alpha}{numeric. A tuning parameter for the learning rate (gamma_0 x t ^ alpha). Default is 0.501.} \item{burn}{numeric. A tuning parameter for "burn-in" observations. We burn-in up to (burn-1) observations and use observations from (burn) for estimation. Default is 1, i.e. no burn-in.} @@ -47,6 +49,10 @@ If this option is TRUE, the first element of the parameter vector is the interce For example, if we want to focus on the 1st and 3rd covariates of x, then set it to be c(1,3).} \item{level}{numeric. The confidence level required. Default is 0.95. Can choose 0.90 and 0.80.} + +\item{path}{logical. The whole path of estimation results is out. Default is FALSE.} + +\item{path_index}{numeric. A vector of indices to print out the path. Default is 1.} } \value{ An object of class \code{"sgdi"}, which is a list containing the following diff --git a/man/sgdi_qr.Rd b/man/sgdi_qr.Rd index 3d5d70f..fb68a25 100644 --- a/man/sgdi_qr.Rd +++ b/man/sgdi_qr.Rd @@ -7,8 +7,8 @@ sgdi_qr( formula, data, - gamma_0 = 1, - alpha = 0.667, + gamma_0 = NULL, + alpha = 0.501, burn = 1, inference = "rs", bt_start = NULL, @@ -27,9 +27,9 @@ sgdi_qr( \item{data}{an optional data frame containing variables in the model.} -\item{gamma_0}{numeric. A tuning parameter for the learning rate (gamma_0 x t ^ alpha). Default is 1.} +\item{gamma_0}{numeric. A tuning parameter for the learning rate (gamma_0 x t ^ alpha). Default is NULL and it is determined by the adaptive method in Chet et al. (2023).} -\item{alpha}{numeric. A tuning parameter for the learning rate (gamma_0 x t ^ alpha). Default is 0.667.} +\item{alpha}{numeric. A tuning parameter for the learning rate (gamma_0 x t ^ alpha). Default is 0.501.} \item{burn}{numeric. A tuning parameter for "burn-in" observations. We burn-in up to (burn-1) observations and use observations from (burn) for estimation. Default is 1, i.e. no burn-in.} @@ -38,7 +38,7 @@ We burn-in up to (burn-1) observations and use observations from (burn) for esti "rss" is for ransom scaling subset inference. This option requires that "rss_indx" should be provided. "rsd" is for the diagonal elements of the random scaling matrix, excluding one for the intercept term.} -\item{bt_start}{numeric. (p x 1) vector, excluding the intercept term. User-provided starting value. Default is NULL.} +\item{bt_start}{numeric. (p x 1) vector, excluding the intercept term. User-provided starting value. Default is NULL. Then, it is estimated by conquer.} \item{qt}{numeric. Quantile. Default is 0.5.} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 7117670..e4ceb50 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -12,8 +12,8 @@ Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // sgd_lm_cpp -List sgd_lm_cpp(const arma::mat& x, const arma::colvec& y, const int& burn, const double& gamma_0, const double& alpha, const arma::colvec& bt_start, const arma::rowvec& x_mean, const arma::rowvec& x_sd); -RcppExport SEXP _SGDinference_sgd_lm_cpp(SEXP xSEXP, SEXP ySEXP, SEXP burnSEXP, SEXP gamma_0SEXP, SEXP alphaSEXP, SEXP bt_startSEXP, SEXP x_meanSEXP, SEXP x_sdSEXP) { +List sgd_lm_cpp(const arma::mat& x, const arma::colvec& y, const int& burn, const double& gamma_0, const double& alpha, const arma::colvec& bt_start, const arma::rowvec& x_mean, const arma::rowvec& x_sd, const bool& path, const arma::colvec& path_index); +RcppExport SEXP _SGDinference_sgd_lm_cpp(SEXP xSEXP, SEXP ySEXP, SEXP burnSEXP, SEXP gamma_0SEXP, SEXP alphaSEXP, SEXP bt_startSEXP, SEXP x_meanSEXP, SEXP x_sdSEXP, SEXP pathSEXP, SEXP path_indexSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -25,13 +25,15 @@ BEGIN_RCPP Rcpp::traits::input_parameter< const arma::colvec& >::type bt_start(bt_startSEXP); Rcpp::traits::input_parameter< const arma::rowvec& >::type x_mean(x_meanSEXP); Rcpp::traits::input_parameter< const arma::rowvec& >::type x_sd(x_sdSEXP); - rcpp_result_gen = Rcpp::wrap(sgd_lm_cpp(x, y, burn, gamma_0, alpha, bt_start, x_mean, x_sd)); + Rcpp::traits::input_parameter< const bool& >::type path(pathSEXP); + Rcpp::traits::input_parameter< const arma::colvec& >::type path_index(path_indexSEXP); + rcpp_result_gen = Rcpp::wrap(sgd_lm_cpp(x, y, burn, gamma_0, alpha, bt_start, x_mean, x_sd, path, path_index)); return rcpp_result_gen; END_RCPP } // sgd_qr_cpp -List sgd_qr_cpp(const arma::mat& x, const arma::colvec& y, const int& burn, const double& gamma_0, const double& alpha, const arma::colvec& bt_start, const double& tau, const arma::rowvec& x_mean, const arma::rowvec& x_sd, const bool& path); -RcppExport SEXP _SGDinference_sgd_qr_cpp(SEXP xSEXP, SEXP ySEXP, SEXP burnSEXP, SEXP gamma_0SEXP, SEXP alphaSEXP, SEXP bt_startSEXP, SEXP tauSEXP, SEXP x_meanSEXP, SEXP x_sdSEXP, SEXP pathSEXP) { +List sgd_qr_cpp(const arma::mat& x, const arma::colvec& y, const int& burn, const double& gamma_0, const double& alpha, const arma::colvec& bt_start, const double& tau, const arma::rowvec& x_mean, const arma::rowvec& x_sd, const bool& path, const arma::colvec& path_index); +RcppExport SEXP _SGDinference_sgd_qr_cpp(SEXP xSEXP, SEXP ySEXP, SEXP burnSEXP, SEXP gamma_0SEXP, SEXP alphaSEXP, SEXP bt_startSEXP, SEXP tauSEXP, SEXP x_meanSEXP, SEXP x_sdSEXP, SEXP pathSEXP, SEXP path_indexSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -45,13 +47,14 @@ BEGIN_RCPP Rcpp::traits::input_parameter< const arma::rowvec& >::type x_mean(x_meanSEXP); Rcpp::traits::input_parameter< const arma::rowvec& >::type x_sd(x_sdSEXP); Rcpp::traits::input_parameter< const bool& >::type path(pathSEXP); - rcpp_result_gen = Rcpp::wrap(sgd_qr_cpp(x, y, burn, gamma_0, alpha, bt_start, tau, x_mean, x_sd, path)); + Rcpp::traits::input_parameter< const arma::colvec& >::type path_index(path_indexSEXP); + rcpp_result_gen = Rcpp::wrap(sgd_qr_cpp(x, y, burn, gamma_0, alpha, bt_start, tau, x_mean, x_sd, path, path_index)); return rcpp_result_gen; END_RCPP } // sgdi_lm_cpp -List sgdi_lm_cpp(const arma::mat& x, const arma::colvec& y, const int& burn, const double& gamma_0, const double& alpha, const arma::colvec& bt_start, const std::string inference, const arma::uvec& rss_idx, const arma::rowvec& x_mean, const arma::rowvec& x_sd); -RcppExport SEXP _SGDinference_sgdi_lm_cpp(SEXP xSEXP, SEXP ySEXP, SEXP burnSEXP, SEXP gamma_0SEXP, SEXP alphaSEXP, SEXP bt_startSEXP, SEXP inferenceSEXP, SEXP rss_idxSEXP, SEXP x_meanSEXP, SEXP x_sdSEXP) { +List sgdi_lm_cpp(const arma::mat& x, const arma::colvec& y, const int& burn, const double& gamma_0, const double& alpha, const arma::colvec& bt_start, const std::string inference, const arma::uvec& rss_idx, const arma::rowvec& x_mean, const arma::rowvec& x_sd, const bool& path, const arma::colvec& path_index); +RcppExport SEXP _SGDinference_sgdi_lm_cpp(SEXP xSEXP, SEXP ySEXP, SEXP burnSEXP, SEXP gamma_0SEXP, SEXP alphaSEXP, SEXP bt_startSEXP, SEXP inferenceSEXP, SEXP rss_idxSEXP, SEXP x_meanSEXP, SEXP x_sdSEXP, SEXP pathSEXP, SEXP path_indexSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -65,7 +68,9 @@ BEGIN_RCPP Rcpp::traits::input_parameter< const arma::uvec& >::type rss_idx(rss_idxSEXP); Rcpp::traits::input_parameter< const arma::rowvec& >::type x_mean(x_meanSEXP); Rcpp::traits::input_parameter< const arma::rowvec& >::type x_sd(x_sdSEXP); - rcpp_result_gen = Rcpp::wrap(sgdi_lm_cpp(x, y, burn, gamma_0, alpha, bt_start, inference, rss_idx, x_mean, x_sd)); + Rcpp::traits::input_parameter< const bool& >::type path(pathSEXP); + Rcpp::traits::input_parameter< const arma::colvec& >::type path_index(path_indexSEXP); + rcpp_result_gen = Rcpp::wrap(sgdi_lm_cpp(x, y, burn, gamma_0, alpha, bt_start, inference, rss_idx, x_mean, x_sd, path, path_index)); return rcpp_result_gen; END_RCPP } @@ -94,9 +99,9 @@ END_RCPP } static const R_CallMethodDef CallEntries[] = { - {"_SGDinference_sgd_lm_cpp", (DL_FUNC) &_SGDinference_sgd_lm_cpp, 8}, - {"_SGDinference_sgd_qr_cpp", (DL_FUNC) &_SGDinference_sgd_qr_cpp, 10}, - {"_SGDinference_sgdi_lm_cpp", (DL_FUNC) &_SGDinference_sgdi_lm_cpp, 10}, + {"_SGDinference_sgd_lm_cpp", (DL_FUNC) &_SGDinference_sgd_lm_cpp, 10}, + {"_SGDinference_sgd_qr_cpp", (DL_FUNC) &_SGDinference_sgd_qr_cpp, 11}, + {"_SGDinference_sgdi_lm_cpp", (DL_FUNC) &_SGDinference_sgdi_lm_cpp, 12}, {"_SGDinference_sgdi_qr_cpp", (DL_FUNC) &_SGDinference_sgdi_qr_cpp, 13}, {NULL, NULL, 0} }; diff --git a/src/sgd_lm.cpp b/src/sgd_lm.cpp index 4a5aebe..f060668 100644 --- a/src/sgd_lm.cpp +++ b/src/sgd_lm.cpp @@ -10,7 +10,9 @@ List sgd_lm_cpp(const arma::mat& x, const double& alpha, const arma::colvec& bt_start, const arma::rowvec& x_mean, - const arma::rowvec& x_sd) { + const arma::rowvec& x_sd, + const bool& path, + const arma::colvec& path_index) { int n = y.n_elem; double learning_rate_new; arma::colvec gradient_bt_new; @@ -18,7 +20,13 @@ List sgd_lm_cpp(const arma::mat& x, int p = bt_t.n_elem; arma::colvec bar_bt_t; bar_bt_t.zeros(p); - + + int p_path = path_index.n_elem; + arma::mat bar_bt_path = arma::mat(n,p_path); + for (int i_p_path = 0; i_p_path < p_path; i_p_path++) { + bar_bt_path(0, i_p_path) = bt_start(path_index(i_p_path)-1); + } + if (x_sd(0) < 0) { if (burn > 1) { for(int obs = 1; obs < (burn+1); obs++){ @@ -33,6 +41,12 @@ List sgd_lm_cpp(const arma::mat& x, gradient_bt_new = trans(x.row(obs-1)) * (x.row(obs-1) * bt_t - y(obs-1)); bt_t = bt_t - learning_rate_new * gradient_bt_new; bar_bt_t = ( bar_bt_t*(obs - burn - 1) + bt_t ) / (obs - burn); + // Save the bar_bt_t for the path outcome + if (path) { + for (int i_p_path = 0; i_p_path < p_path; i_p_path++) { + bar_bt_path(obs-1, i_p_path) = bar_bt_t(path_index(i_p_path)-1); + } + } } } else { if (burn > 1) { @@ -48,9 +62,16 @@ List sgd_lm_cpp(const arma::mat& x, gradient_bt_new = trans((x.row(obs-1)-x_mean)/x_sd) * ((x.row(obs-1)-x_mean)/x_sd * bt_t - y(obs-1)); bt_t = bt_t - learning_rate_new * gradient_bt_new; bar_bt_t = ( bar_bt_t*(obs - burn - 1) + bt_t ) / (obs - burn); + // Save the bar_bt_t for the path outcome + if (path) { + for (int i_p_path = 0; i_p_path < p_path; i_p_path++) { + bar_bt_path(obs-1, i_p_path) = bar_bt_t(path_index(i_p_path)-1); + } + } } } //------------------------------------------- - return List::create(Named("beta_hat") = bar_bt_t); + return List::create(Named("beta_hat") = bar_bt_t, + Named("beta_hat_path") = bar_bt_path); } diff --git a/src/sgd_qr.cpp b/src/sgd_qr.cpp index 8739d4f..f13d42b 100644 --- a/src/sgd_qr.cpp +++ b/src/sgd_qr.cpp @@ -12,7 +12,8 @@ List sgd_qr_cpp(const arma::mat& x, const double& tau, const arma::rowvec& x_mean, const arma::rowvec& x_sd, - const bool& path) { + const bool& path, + const arma::colvec& path_index) { int n = y.n_elem; double learning_rate_new; arma::colvec gradient_bt_new; @@ -21,6 +22,12 @@ List sgd_qr_cpp(const arma::mat& x, arma::colvec bar_bt_t; bar_bt_t.zeros(p); + int p_path = path_index.n_elem; + arma::mat bar_bt_path = arma::mat(n,p_path); + for (int i_p_path = 0; i_p_path < p_path; i_p_path++) { + bar_bt_path(0, i_p_path) = bt_start(path_index(i_p_path)-1); + } + if (x_sd(0) < 0) { if (burn > 1) { for(int obs = 1; obs < (burn+1); obs++){ @@ -35,6 +42,13 @@ List sgd_qr_cpp(const arma::mat& x, gradient_bt_new = ( trans(x.row(obs-1)) * ( (y(obs-1) < as_scalar(x.row(obs-1) * bt_t)) - tau) ); bt_t = bt_t - learning_rate_new * gradient_bt_new; bar_bt_t = ( bar_bt_t*(obs - burn - 1) + bt_t ) / (obs - burn); + + // Save the bar_bt_t for the path outcome + if (path) { + for (int i_p_path = 0; i_p_path < p_path; i_p_path++) { + bar_bt_path(obs-1, i_p_path) = bar_bt_t(path_index(i_p_path)-1); + } + } } } else { if (burn > 1) { @@ -50,7 +64,15 @@ List sgd_qr_cpp(const arma::mat& x, gradient_bt_new = ( trans((x.row(obs-1)-x_mean)/x_sd) * ( (y(obs-1) < as_scalar((x.row(obs-1)-x_mean)/x_sd * bt_t)) - tau) ); bt_t = bt_t - learning_rate_new * gradient_bt_new; bar_bt_t = ( bar_bt_t*(obs - burn - 1) + bt_t ) / (obs - burn); + + // Save the bar_bt_t for the path outcome + if (path) { + for (int i_p_path = 0; i_p_path < p_path; i_p_path++) { + bar_bt_path(obs-1, i_p_path) = bar_bt_t(path_index(i_p_path)-1); + } + } } } - return List::create(Named("beta_hat") = bar_bt_t); + return List::create(Named("beta_hat") = bar_bt_t, + Named("beta_hat_path") = bar_bt_path); } diff --git a/src/sgdi_lm.cpp b/src/sgdi_lm.cpp index cc904e9..b1d2e14 100644 --- a/src/sgdi_lm.cpp +++ b/src/sgdi_lm.cpp @@ -12,16 +12,24 @@ List sgdi_lm_cpp(const arma::mat& x, const std::string inference, const arma::uvec& rss_idx, const arma::rowvec& x_mean, - const arma::rowvec& x_sd) { + const arma::rowvec& x_sd, + const bool& path, + const arma::colvec& path_index) { int n = y.n_elem; double learning_rate_new; arma::colvec gradient_bt_new; arma::colvec bt_t = bt_start; int p = bt_t.n_elem; int n_s; - arma::colvec bar_bt_t; - bar_bt_t.zeros(p); - + arma::colvec bar_bt_t = bt_start; + + // Initialize the path output variables when {path} is true + int p_path = path_index.n_elem; + arma::mat bar_bt_path = arma::mat(n,p_path); + for (int i_p_path = 0; i_p_path < p_path; i_p_path++) { + bar_bt_path(0, i_p_path) = bt_start(path_index(i_p_path)-1); + } + arma::mat A_t = arma::mat(p,p); arma::vec b_t = arma::vec(p); double c_t = 0.0; @@ -53,6 +61,14 @@ List sgdi_lm_cpp(const arma::mat& x, bt_t = bt_t - learning_rate_new * gradient_bt_new; n_s = obs - burn; bar_bt_t = ( bar_bt_t*(n_s - 1) + bt_t ) / (n_s); + + // Save the bar_bt_t for the path outcome + if (path) { + for (int i_p_path = 0; i_p_path < p_path; i_p_path++) { + bar_bt_path(obs-1, i_p_path) = bar_bt_t(path_index(i_p_path)-1); + } + } + if ( inference == "rs") { A_t = A_t + std::pow(n_s, 2.0) * bar_bt_t * trans(bar_bt_t); b_t = b_t + std::pow(n_s, 2.0) * bar_bt_t; @@ -84,6 +100,14 @@ List sgdi_lm_cpp(const arma::mat& x, bt_t = bt_t - learning_rate_new * gradient_bt_new; n_s = obs - burn; bar_bt_t = ( bar_bt_t*(n_s - 1) + bt_t ) / (n_s); + + // Save the bar_bt_t for the path outcome + if (path) { + for (int i_p_path = 0; i_p_path < p_path; i_p_path++) { + bar_bt_path(obs-1, i_p_path) = bar_bt_t(path_index(i_p_path)-1); + } + } + if ( inference == "rs") { A_t = A_t + std::pow(n_s, 2.0) * bar_bt_t * trans(bar_bt_t); b_t = b_t + std::pow(n_s, 2.0) * bar_bt_t; @@ -116,5 +140,6 @@ List sgdi_lm_cpp(const arma::mat& x, return List::create(Named("beta_hat") = bar_bt_t, Named("V_hat") = V_t, Named("V_hat_sub") = V_ts, - Named("V_hat_diag") = V_td); + Named("V_hat_diag") = V_td, + Named("beta_hat_path") = bar_bt_path); } \ No newline at end of file diff --git a/src/sgdi_qr.cpp b/src/sgdi_qr.cpp index c52b78e..4b2316b 100644 --- a/src/sgdi_qr.cpp +++ b/src/sgdi_qr.cpp @@ -23,14 +23,14 @@ List sgdi_qr_cpp(const arma::mat& x, int p = bt_t.n_elem; int n_s; arma::colvec bar_bt_t = bt_start; - // bar_bt_t.zeros(p); - + // Initialize the path output variables when {path} is true - //if (path) { - int p_path = path_index.n_elem; - arma::mat bar_bt_path = arma::mat(n,p_path); - //} + int p_path = path_index.n_elem; + arma::mat bar_bt_path = arma::mat(n,p_path); + for (int i_p_path = 0; i_p_path < p_path; i_p_path++) { + bar_bt_path(0, i_p_path) = bt_start(path_index(i_p_path)-1); + } arma::mat A_t = arma::mat(p,p); arma::vec b_t = arma::vec(p); double c_t = 0.0; @@ -46,7 +46,7 @@ List sgdi_qr_cpp(const arma::mat& x, arma::vec A_td = arma::vec(p); arma::vec b_td = arma::vec(p); arma::vec V_td = arma::vec(p); - + if (x_sd(0) < 0) { // No studentization part if (burn > 1) { for(int obs = 1; obs < (burn+1); obs++){ @@ -66,7 +66,7 @@ List sgdi_qr_cpp(const arma::mat& x, // Save the bar_bt_t for the path outcome if (path) { for (int i_p_path = 0; i_p_path < p_path; i_p_path++) { - bar_bt_path(obs, i_p_path) = bar_bt_t(path_index(i_p_path)); + bar_bt_path(obs-1, i_p_path) = bar_bt_t(path_index(i_p_path) - 1 ); } } @@ -105,7 +105,7 @@ List sgdi_qr_cpp(const arma::mat& x, // Save the bar_bt_t for the path outcome if (path) { for (int i_p_path = 0; i_p_path < p_path; i_p_path++) { - bar_bt_path(obs-1, i_p_path) = bar_bt_t(path_index(i_p_path)); + bar_bt_path(obs-1, i_p_path) = bar_bt_t(path_index(i_p_path) - 1 ); } } // Rcout << bar_bt_t(2) << '\n'; diff --git a/tests/testthat/test-sgdi_qr.R b/tests/testthat/test-sgdi_qr.R index 8221cee..6e56ead 100644 --- a/tests/testthat/test-sgdi_qr.R +++ b/tests/testthat/test-sgdi_qr.R @@ -141,7 +141,7 @@ test_that("sgdi_qr path_index matters", { my.dat = data.frame(y=y, x=x) out1 = sgdi_qr(y~., data=my.dat, path=TRUE, path_index=1) out2 = sgdi_qr(y~., data=my.dat, path=TRUE, path_index=2) - check = max(abs(out1$beta_hat_path[100] - out2$beta_hat_path[100])) + check = max(abs(out1$path_coefficients[100] - out2$path_coefficients[100])) expect_false(check==0) })