update for names of R functions to with underscores

hta-pharma · Jun 20, 2023 · 567b256 · 567b256
1 parent 12fa4bb
commit 567b256
Show file tree

Hide file tree

Showing 6 changed files with 82 additions and 514 deletions.
diff --git a/R/bucher.R b/R/bucher.R
@@ -9,24 +9,24 @@
 #'
 #' @param trt a list with two named scalars for the study with interested experimental arm, one named 'est' for the point estimate, and the other named 'se' for the standard error
 #' @param com same as \code{trt}, but for the study with interested control arm
-#' @param conf.lv a numerical scalar, prescribe confidence level to derive two-sided confidence interval for the adjusted treatment effect
+#' @param conf_lv a numerical scalar, prescribe confidence level to derive two-sided confidence interval for the adjusted treatment effect
 #'
 #' @return a list with 5 elements,
 #' \describe{
 #'   \item est - a scalar, point estimate of the adjusted treatment effect
 #'   \item se - a scalar, standard error of the adjusted treatment effect (i.e. \code{est} in return)
-#'   \item ci_l - a scalor, lower confidence limit of a two-sided CI with prescribed nominal level by \code{conf.lv}
-#'   \item ci_u - a scalor, upper confidence limit of a two-sided CI with prescribed nominal level by \code{conf.lv}
+#'   \item ci_l - a scalor, lower confidence limit of a two-sided CI with prescribed nominal level by \code{conf_lv}
+#'   \item ci_u - a scalor, upper confidence limit of a two-sided CI with prescribed nominal level by \code{conf_lv}
 #'   \item pval - p-value of Z-test, with null hypothesis that \code{est} is zero
 #' }
 #' @export
 #'
 #' @examples
-bucher <- function(trt, com, conf.lv = 0.95) {
+bucher <- function(trt, com, conf_lv = 0.95) {
   est <- trt$est - com$est
   se <- sqrt(trt$se^2 + com$se^2)
-  ci_l <- est - qnorm(0.5 + conf.lv / 2) * se
-  ci_u <- est + qnorm(0.5 + conf.lv / 2) * se
+  ci_l <- est - qnorm(0.5 + conf_lv / 2) * se
+  ci_u <- est + qnorm(0.5 + conf_lv / 2) * se
   if (est > 0) {
     pval <- 2 * (1 - pnorm(est, 0, se))
   } else {
@@ -47,20 +47,20 @@ bucher <- function(trt, com, conf.lv = 0.95) {
 #' Report-friendly output format for resutl from Bucher's method
 #'
 #' @param output output from \code{\link{bucher}} function
-#' @param ci.digits an integer, number of decimal places for point estimate and derived confidence limits
-#' @param pval.digits an integer, number of decimal places to display Z-test p-value
+#' @param ci_digits an integer, number of decimal places for point estimate and derived confidence limits
+#' @param pval_digits an integer, number of decimal places to display Z-test p-value
 #'
-#' @return a character vector of two elements, first element inf format of 'est (ci_l; ci_u)', second element is Z-test p-value, rounded according to \code{pval.digits}
+#' @return a character vector of two elements, first element inf format of 'est (ci_l; ci_u)', second element is Z-test p-value, rounded according to \code{pval_digits}
 
-print.bucher <- function(output, ci.digits = 2, pval.digits = 3) {
+print_bucher <- function(output, ci_digits = 2, pval_digits = 3) {
   res <- paste0(
-    format(round(output$est, ci.digits), nsmall = ci.digits), " (",
-    format(round(output$ci_l, ci.digits), nsmall = ci.digits), ";",
-    format(round(output$ci_u, ci.digits), nsmall = ci.digits), ")"
+    format(round(output$est, ci_digits), nsmall = ci_digits), " (",
+    format(round(output$ci_l, ci_digits), nsmall = ci_digits), ";",
+    format(round(output$ci_u, ci_digits), nsmall = ci_digits), ")"
   )
 
-  disp_pval <- round(output$pval, pval.digits)
-  disp_pval <- ifelse(disp_pval == 0, paste0("<", 1 / (10^pval.digits)), format(output$pval, nsmall = pval.digits))
+  disp_pval <- round(output$pval, pval_digits)
+  disp_pval <- ifelse(disp_pval == 0, paste0("<", 1 / (10^pval_digits)), format(output$pval, nsmall = pval_digits))
 
   c(result = res, pvalue = disp_pval)
 }
diff --git a/R/maic_analysis.R b/R/maic_analysis.R
diff --git a/R/matching.R b/R/matching.R
@@ -8,46 +8,46 @@
 #' and generates weights for each individual in IPD trial that matches the chosen statistics of those effect modifiers in Aggregated Data (AgD) trial.
 #'
 #' @param data a numeric matrix, centered effect modifiers of IPD, no missing value in any cell is allowed
-#' @param centered.colnames a character or numeric vector, column indicators of centered effect modifiers, by default NULL meaning all columns in \code{data} are effect modifiers
+#' @param centered_colnames a character or numeric vector, column indicators of centered effect modifiers, by default NULL meaning all columns in \code{data} are effect modifiers
 #' @param startVal a scalar, the starting value for all coefficients of the propensity score regression
 #' @param method a string, name of the optimization algorithm (see 'method' argument of \code{base::optim()}). The default is "BFGS", other options are "Nelder-Mead", "CG", "L-BFGS-B", "SANN", and "Brent"
 #' @param ... all other arguments from \code{base::optim()}
 #'
 #' @return a list with the following 4 elements,
 #' \describe{
 #'   \item data - a data.frame, includes the input \code{data} with appended column 'weights' and 'scaled_weights'. Scaled weights has a summation to be the number of rows in \code{data} that has no missing value in any of the effect modifiers
-#'   \item centered.colnames - column names of centered effect modifiers in \code{data}
+#'   \item centered_colnames - column names of centered effect modifiers in \code{data}
 #'   \item nr_missing - number of rows in \code{data} that has at least 1 missing value in specified centered effect modifiers
 #'   \item ess - effective sample size, square of sum divided by sum of squares
 #'   \item opt - R object returned by \code{base::optim()}, for assess convergence and other details
 #' }
 #' @export
 #'
 #' @examples
-estimate.weights <- function(data, centered.colnames = NULL, startVal = 0, method = "BFGS", ...) {
+estimate_weights <- function(data, centered_colnames = NULL, startVal = 0, method = "BFGS", ...) {
   # pre check
   ch1 <- is.data.frame(data)
   if(!ch1) stop("'data' is not a data.frame")
 
-  ch2 <- (!is.null(centered.colnames))
-  if(ch2 & is.numeric(centered.colnames)){
-    ch2b <- any(centered.colnames<1|centered.colnames>ncol(data))
-    if(ch2b) stop("specified centered.colnames are out of bound")
-  }else if(ch2 & is.character(centered.colnames)){
-    ch2b <- !all(centered.colnames%in%names(data))
-    if(ch2b) stop("1 or more specified centered.colnames are not found in 'data'")
+  ch2 <- (!is.null(centered_colnames))
+  if(ch2 & is.numeric(centered_colnames)){
+    ch2b <- any(centered_colnames<1|centered_colnames>ncol(data))
+    if(ch2b) stop("specified centered_colnames are out of bound")
+  }else if(ch2 & is.character(centered_colnames)){
+    ch2b <- !all(centered_colnames%in%names(data))
+    if(ch2b) stop("1 or more specified centered_colnames are not found in 'data'")
   }else{
-    stop("'centered.colnames' should be either a numeric or character vector")
+    stop("'centered_colnames' should be either a numeric or character vector")
   }
 
-  ch3 <- sapply(1:length(centered.colnames), function(ii){
-     !is.numeric(data[,centered.colnames[ii]])
+  ch3 <- sapply(1:length(centered_colnames), function(ii){
+     !is.numeric(data[,centered_colnames[ii]])
   })
   if(any(ch3)) stop(paste0("following columns of 'data' are not numeric for the calculation:",paste(which(ch3),collapse = ",")))
 
   # prepare data for optimization
-  if(is.null(centered.colnames)) centered.colnames <- 1:ncol(data)
-  EM <- data[ , centered.colnames, drop=FALSE]
+  if(is.null(centered_colnames)) centered_colnames <- 1:ncol(data)
+  EM <- data[ , centered_colnames, drop=FALSE]
   ind <- apply(EM,1,function(xx) any(is.na(xx)))
   nr_missing <- sum(ind)
   EM <- as.matrix(EM[!ind,,drop=FALSE])
@@ -79,12 +79,12 @@ estimate.weights <- function(data, centered.colnames = NULL, startVal = 0, metho
   data$scaled_weights <- NA
   data$scaled_weights[!ind] <- wt_rs
 
-  if(is.numeric(centered.colnames)) centered.colnames <- names(data)[centered.colnames]
+  if(is.numeric(centered_colnames)) centered_colnames <- names(data)[centered_colnames]
 
   # Output
   list(
     data = data,
-    centered.colnames = centered.colnames,
+    centered_colnames = centered_colnames,
     nr_missing = nr_missing,
     ess = sum(wt)^2 / sum(wt^2),
     opt = opt1
@@ -99,11 +99,11 @@ estimate.weights <- function(data, centered.colnames = NULL, startVal = 0, metho
 #'
 #'
 #' @param wt a numeric vector of individual MAIC weights (derived use in \code{\link{cal_weights}})
-#' @param main.title a character string, main title of the plot
+#' @param main_title a character string, main title of the plot
 #'
 #' @return a plot
 #' @export
-plot.weights <- function(wt, main.title = "Unscaled Individual Weigths") {
+plot_weights <- function(wt, main_title = "Unscaled Individual Weigths") {
 
   # calculate sample size and exclude NA from wt
   nr_na <- sum(is.na(wt))
@@ -129,7 +129,7 @@ plot.weights <- function(wt, main.title = "Unscaled Individual Weigths") {
 
   # plot
   par(mfrow = c(1, 1), family = "HersheySerif", mgp = c(2.3, 0.5, 0), cex.axis = 0.9, cex.lab = 0.95, bty = "n")
-  hist(wt, border = "white", col = "#6ECEB2", main = main.title, breaks = 20, yaxt = "n", xlab = "")
+  hist(wt, border = "white", col = "#6ECEB2", main = main_title, breaks = 20, yaxt = "n", xlab = "")
   axis(2, las = 1)
   abline(v = median(wt), lty = 2, col = "#688CE8", lwd = 2)
   legend("topright", bty = "n", lty = plot_lty, cex = 0.8, legend = plot_legend)

diff --git a/R/process_data.R b/R/process_data.R
@@ -4,34 +4,34 @@
 
 #' Process and Check Aggregated data format
 #'
-#' @param raw.agd
+#' @param raw_agd
 #'
 #' @return
 #' @export
-process.agd <- function(raw.agd) {
+process_agd <- function(raw_agd) {
 
-  raw.agd <- as.data.frame(raw.agd)
+  raw_agd <- as.data.frame(raw_agd)
   # make all column names to be capital, avoid different style
-  names(raw.agd) <- toupper(names(raw.agd))
+  names(raw_agd) <- toupper(names(raw_agd))
 
   # regulaized column name patterns
   must_exist <- c("STUDY","ARM", "N")
   legal_suffix <- c("MEAN","MEDIAN","SD","COUNT")
 
   # swap "TREATMENT" column to "ARM", if applicable
-  if("TREATMENT"%in%names(raw.agd) & (!"ARM"%in%names(raw.agd))){
-    raw.agd$ARM <- raw.agd$TREATMENT
-    raw.agd <- raw.agd[,names(raw.agd)!="TREATMENT"]
+  if("TREATMENT"%in%names(raw_agd) & (!"ARM"%in%names(raw_agd))){
+    raw_agd$ARM <- raw_agd$TREATMENT
+    raw_agd <- raw_agd[,names(raw_agd)!="TREATMENT"]
     warning("'TREATMENT' column is renamed as 'ARM'")
   }
 
   # check: must exist
-  if(!all(must_exist%in%names(raw.agd))){
-    stop("At least 1 of the must-exists columns (STUDY, ARM, N) cannot be found in raw.agd!")
+  if(!all(must_exist%in%names(raw_agd))){
+    stop("At least 1 of the must-exists columns (STUDY, ARM, N) cannot be found in raw_agd!")
   }
 
   # check: legal suffix
-  other_colnames <- setdiff(names(raw.agd),must_exist)
+  other_colnames <- setdiff(names(raw_agd),must_exist)
   ind1 <- grepl("_",other_colnames,fixed=TRUE)
   ind2 <- sapply(other_colnames,function(xx){
       tmp <- unlist(strsplit(xx,split="_"))
@@ -40,7 +40,7 @@ process.agd <- function(raw.agd) {
   ind2 <- (ind2%in%legal_suffix)
 
   use_cols <- other_colnames[ind1&ind2]
-  use_agd <- raw.agd[,c(must_exist,use_cols),drop=FALSE]
+  use_agd <- raw_agd[,c(must_exist,use_cols),drop=FALSE]
   if(!all(other_colnames%in%use_cols)){
     warning(paste0("following columns are ignored since it does not following the naming convention:",
                    paste(setdiff(other_colnames,use_cols),collapse = ",")))
@@ -66,26 +66,26 @@ process.agd <- function(raw.agd) {
 
 #' Process Individual Patient data to dummize categorical effective modifiers
 #'
-#' @param raw.ipd
-#' @param dummize.cols
-#' @param dummize.ref.level
+#' @param raw_ipd
+#' @param dummize_cols
+#' @param dummize_ref_level
 #'
 #' @return
 #' @export
 
-process.ipd <- function(raw.ipd, dummize.cols, dummize.ref.level){
-   for(i in 1:length(dummize.cols)){
-     yy <- raw.ipd[[ dummize.cols[i] ]]
+process_ipd <- function(raw_ipd, dummize_cols, dummize_ref_level){
+   for(i in 1:length(dummize_cols)){
+     yy <- raw_ipd[[ dummize_cols[i] ]]
      yy_levels <- na.omit(unique(yy))
-     yy <- factor( as.character(yy), levels=c(dummize.ref.level[i], setdiff(yy_levels,dummize.ref.level[i])) )
+     yy <- factor( as.character(yy), levels=c(dummize_ref_level[i], setdiff(yy_levels,dummize_ref_level[i])) )
      new_yy <- sapply(levels(yy)[-1],function(j){
                    as.numeric(yy == j)
                })
      new_yy <- as.data.frame(new_yy)
-     names(new_yy) <- toupper(paste(dummize.cols[i],levels(yy)[-1], sep="_"))
-     raw.ipd <- cbind(raw.ipd,new_yy)
+     names(new_yy) <- toupper(paste(dummize_cols[i],levels(yy)[-1], sep="_"))
+     raw_ipd <- cbind(raw_ipd,new_yy)
    }
-   raw.ipd
+   raw_ipd
 }
 
 
@@ -96,7 +96,7 @@ process.ipd <- function(raw.ipd, dummize.cols, dummize.ref.level){
 #'
 #' @return
 #' @export
-center.ipd <- function(ipd,agd){
+center_ipd <- function(ipd,agd){
   # regulaized column name patterns
   must_exist <- c("STUDY","ARM", "N")
   legal_suffix <- c("MEAN","MEDIAN","SD","PROP")
@@ -180,41 +180,41 @@ ext_tte_transfer <- function(dd, time_scale = "month", trt = NULL) {
 #' @param use_agd
 #'
 #' @return
-complete.agd <- function(use.agd) {
-  use.agd <- as.data.frame(use.agd)
-  use.agd <- with(use.agd, {use.agd[tolower(ARM)!="total",,drop=FALSE]})
+complete_agd <- function(use_agd) {
+  use_agd <- as.data.frame(use_agd)
+  use_agd <- with(use_agd, {use_agd[tolower(ARM)!="total",,drop=FALSE]})
 
-  if(nrow(use.agd)<2) stop("error in call complete_agd: need to have at least 2 rows that ARM!='total' ")
+  if(nrow(use_agd)<2) stop("error in call complete_agd: need to have at least 2 rows that ARM!='total' ")
 
-  rowId <- nrow(use.agd)+1
-  use.agd[rowId, ] <- NA
-  use.agd$STUDY[rowId] <-   use.agd$STUDY[1]
-  use.agd$ARM[rowId] <- "total"
+  rowId <- nrow(use_agd)+1
+  use_agd[rowId, ] <- NA
+  use_agd$STUDY[rowId] <-   use_agd$STUDY[1]
+  use_agd$ARM[rowId] <- "total"
 
   # complete N and count
-  NN <- use.agd$N[rowId] <- sum(use.agd$N, na.rm=TRUE)
-  nn <- use.agd$N[-rowId]
-  for(i in grep("_COUNT$",names(use.agd),value=TRUE)){
-     use.agd[[i]][rowId] <- sum(use.agd[[i]], na.rm=TRUE)
+  NN <- use_agd$N[rowId] <- sum(use_agd$N, na.rm=TRUE)
+  nn <- use_agd$N[-rowId]
+  for(i in grep("_COUNT$",names(use_agd),value=TRUE)){
+     use_agd[[i]][rowId] <- sum(use_agd[[i]], na.rm=TRUE)
   }
 
   # complete MEAN
-  for(i in grep("_MEAN$",names(use.agd),value=TRUE)){
-    use.agd[[i]][rowId] <- sum(use.agd[[i]]*nn)/NN
+  for(i in grep("_MEAN$",names(use_agd),value=TRUE)){
+    use_agd[[i]][rowId] <- sum(use_agd[[i]]*nn)/NN
   }
 
   # complete SD
-  for(i in grep("_SD$",names(use.agd),value=TRUE)){
-    use.agd[[i]][rowId] <- sqrt( sum(use.agd[[i]]^2*(nn-1))/(N-1) )
+  for(i in grep("_SD$",names(use_agd),value=TRUE)){
+    use_agd[[i]][rowId] <- sqrt( sum(use_agd[[i]]^2*(nn-1))/(N-1) )
   }
 
   # complete MEDIAN, approximately!!
-  for(i in grep("_MEDIAN$",names(use.agd),value=TRUE)){
-    use.agd[[i]][rowId] <- mean(use.agd[[i]][-rowId])
+  for(i in grep("_MEDIAN$",names(use_agd),value=TRUE)){
+    use_agd[[i]][rowId] <- mean(use_agd[[i]][-rowId])
   }
 
   # output
-  use.agd
+  use_agd
 }