Skip to content

Commit

Permalink
update for names of R functions to with underscores
Browse files Browse the repository at this point in the history
  • Loading branch information
hoppanda committed Jun 20, 2023
1 parent 12fa4bb commit 567b256
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 514 deletions.
30 changes: 15 additions & 15 deletions R/bucher.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,24 @@
#'
#' @param trt a list with two named scalars for the study with interested experimental arm, one named 'est' for the point estimate, and the other named 'se' for the standard error
#' @param com same as \code{trt}, but for the study with interested control arm
#' @param conf.lv a numerical scalar, prescribe confidence level to derive two-sided confidence interval for the adjusted treatment effect
#' @param conf_lv a numerical scalar, prescribe confidence level to derive two-sided confidence interval for the adjusted treatment effect
#'
#' @return a list with 5 elements,
#' \describe{
#' \item est - a scalar, point estimate of the adjusted treatment effect
#' \item se - a scalar, standard error of the adjusted treatment effect (i.e. \code{est} in return)
#' \item ci_l - a scalor, lower confidence limit of a two-sided CI with prescribed nominal level by \code{conf.lv}
#' \item ci_u - a scalor, upper confidence limit of a two-sided CI with prescribed nominal level by \code{conf.lv}
#' \item ci_l - a scalor, lower confidence limit of a two-sided CI with prescribed nominal level by \code{conf_lv}
#' \item ci_u - a scalor, upper confidence limit of a two-sided CI with prescribed nominal level by \code{conf_lv}
#' \item pval - p-value of Z-test, with null hypothesis that \code{est} is zero
#' }
#' @export
#'
#' @examples
bucher <- function(trt, com, conf.lv = 0.95) {
bucher <- function(trt, com, conf_lv = 0.95) {
est <- trt$est - com$est
se <- sqrt(trt$se^2 + com$se^2)
ci_l <- est - qnorm(0.5 + conf.lv / 2) * se
ci_u <- est + qnorm(0.5 + conf.lv / 2) * se
ci_l <- est - qnorm(0.5 + conf_lv / 2) * se
ci_u <- est + qnorm(0.5 + conf_lv / 2) * se
if (est > 0) {
pval <- 2 * (1 - pnorm(est, 0, se))
} else {
Expand All @@ -47,20 +47,20 @@ bucher <- function(trt, com, conf.lv = 0.95) {
#' Report-friendly output format for resutl from Bucher's method
#'
#' @param output output from \code{\link{bucher}} function
#' @param ci.digits an integer, number of decimal places for point estimate and derived confidence limits
#' @param pval.digits an integer, number of decimal places to display Z-test p-value
#' @param ci_digits an integer, number of decimal places for point estimate and derived confidence limits
#' @param pval_digits an integer, number of decimal places to display Z-test p-value
#'
#' @return a character vector of two elements, first element inf format of 'est (ci_l; ci_u)', second element is Z-test p-value, rounded according to \code{pval.digits}
#' @return a character vector of two elements, first element inf format of 'est (ci_l; ci_u)', second element is Z-test p-value, rounded according to \code{pval_digits}

print.bucher <- function(output, ci.digits = 2, pval.digits = 3) {
print_bucher <- function(output, ci_digits = 2, pval_digits = 3) {
res <- paste0(
format(round(output$est, ci.digits), nsmall = ci.digits), " (",
format(round(output$ci_l, ci.digits), nsmall = ci.digits), ";",
format(round(output$ci_u, ci.digits), nsmall = ci.digits), ")"
format(round(output$est, ci_digits), nsmall = ci_digits), " (",
format(round(output$ci_l, ci_digits), nsmall = ci_digits), ";",
format(round(output$ci_u, ci_digits), nsmall = ci_digits), ")"
)

disp_pval <- round(output$pval, pval.digits)
disp_pval <- ifelse(disp_pval == 0, paste0("<", 1 / (10^pval.digits)), format(output$pval, nsmall = pval.digits))
disp_pval <- round(output$pval, pval_digits)
disp_pval <- ifelse(disp_pval == 0, paste0("<", 1 / (10^pval_digits)), format(output$pval, nsmall = pval_digits))

c(result = res, pvalue = disp_pval)
}
37 changes: 0 additions & 37 deletions R/maic_analysis.R

This file was deleted.

40 changes: 20 additions & 20 deletions R/matching.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,46 +8,46 @@
#' and generates weights for each individual in IPD trial that matches the chosen statistics of those effect modifiers in Aggregated Data (AgD) trial.
#'
#' @param data a numeric matrix, centered effect modifiers of IPD, no missing value in any cell is allowed
#' @param centered.colnames a character or numeric vector, column indicators of centered effect modifiers, by default NULL meaning all columns in \code{data} are effect modifiers
#' @param centered_colnames a character or numeric vector, column indicators of centered effect modifiers, by default NULL meaning all columns in \code{data} are effect modifiers
#' @param startVal a scalar, the starting value for all coefficients of the propensity score regression
#' @param method a string, name of the optimization algorithm (see 'method' argument of \code{base::optim()}). The default is "BFGS", other options are "Nelder-Mead", "CG", "L-BFGS-B", "SANN", and "Brent"
#' @param ... all other arguments from \code{base::optim()}
#'
#' @return a list with the following 4 elements,
#' \describe{
#' \item data - a data.frame, includes the input \code{data} with appended column 'weights' and 'scaled_weights'. Scaled weights has a summation to be the number of rows in \code{data} that has no missing value in any of the effect modifiers
#' \item centered.colnames - column names of centered effect modifiers in \code{data}
#' \item centered_colnames - column names of centered effect modifiers in \code{data}
#' \item nr_missing - number of rows in \code{data} that has at least 1 missing value in specified centered effect modifiers
#' \item ess - effective sample size, square of sum divided by sum of squares
#' \item opt - R object returned by \code{base::optim()}, for assess convergence and other details
#' }
#' @export
#'
#' @examples
estimate.weights <- function(data, centered.colnames = NULL, startVal = 0, method = "BFGS", ...) {
estimate_weights <- function(data, centered_colnames = NULL, startVal = 0, method = "BFGS", ...) {
# pre check
ch1 <- is.data.frame(data)
if(!ch1) stop("'data' is not a data.frame")

ch2 <- (!is.null(centered.colnames))
if(ch2 & is.numeric(centered.colnames)){
ch2b <- any(centered.colnames<1|centered.colnames>ncol(data))
if(ch2b) stop("specified centered.colnames are out of bound")
}else if(ch2 & is.character(centered.colnames)){
ch2b <- !all(centered.colnames%in%names(data))
if(ch2b) stop("1 or more specified centered.colnames are not found in 'data'")
ch2 <- (!is.null(centered_colnames))
if(ch2 & is.numeric(centered_colnames)){
ch2b <- any(centered_colnames<1|centered_colnames>ncol(data))
if(ch2b) stop("specified centered_colnames are out of bound")
}else if(ch2 & is.character(centered_colnames)){
ch2b <- !all(centered_colnames%in%names(data))
if(ch2b) stop("1 or more specified centered_colnames are not found in 'data'")
}else{
stop("'centered.colnames' should be either a numeric or character vector")
stop("'centered_colnames' should be either a numeric or character vector")
}

ch3 <- sapply(1:length(centered.colnames), function(ii){
!is.numeric(data[,centered.colnames[ii]])
ch3 <- sapply(1:length(centered_colnames), function(ii){
!is.numeric(data[,centered_colnames[ii]])
})
if(any(ch3)) stop(paste0("following columns of 'data' are not numeric for the calculation:",paste(which(ch3),collapse = ",")))

# prepare data for optimization
if(is.null(centered.colnames)) centered.colnames <- 1:ncol(data)
EM <- data[ , centered.colnames, drop=FALSE]
if(is.null(centered_colnames)) centered_colnames <- 1:ncol(data)
EM <- data[ , centered_colnames, drop=FALSE]
ind <- apply(EM,1,function(xx) any(is.na(xx)))
nr_missing <- sum(ind)
EM <- as.matrix(EM[!ind,,drop=FALSE])
Expand Down Expand Up @@ -79,12 +79,12 @@ estimate.weights <- function(data, centered.colnames = NULL, startVal = 0, metho
data$scaled_weights <- NA
data$scaled_weights[!ind] <- wt_rs

if(is.numeric(centered.colnames)) centered.colnames <- names(data)[centered.colnames]
if(is.numeric(centered_colnames)) centered_colnames <- names(data)[centered_colnames]

# Output
list(
data = data,
centered.colnames = centered.colnames,
centered_colnames = centered_colnames,
nr_missing = nr_missing,
ess = sum(wt)^2 / sum(wt^2),
opt = opt1
Expand All @@ -99,11 +99,11 @@ estimate.weights <- function(data, centered.colnames = NULL, startVal = 0, metho
#'
#'
#' @param wt a numeric vector of individual MAIC weights (derived use in \code{\link{cal_weights}})
#' @param main.title a character string, main title of the plot
#' @param main_title a character string, main title of the plot
#'
#' @return a plot
#' @export
plot.weights <- function(wt, main.title = "Unscaled Individual Weigths") {
plot_weights <- function(wt, main_title = "Unscaled Individual Weigths") {

# calculate sample size and exclude NA from wt
nr_na <- sum(is.na(wt))
Expand All @@ -129,7 +129,7 @@ plot.weights <- function(wt, main.title = "Unscaled Individual Weigths") {

# plot
par(mfrow = c(1, 1), family = "HersheySerif", mgp = c(2.3, 0.5, 0), cex.axis = 0.9, cex.lab = 0.95, bty = "n")
hist(wt, border = "white", col = "#6ECEB2", main = main.title, breaks = 20, yaxt = "n", xlab = "")
hist(wt, border = "white", col = "#6ECEB2", main = main_title, breaks = 20, yaxt = "n", xlab = "")
axis(2, las = 1)
abline(v = median(wt), lty = 2, col = "#688CE8", lwd = 2)
legend("topright", bty = "n", lty = plot_lty, cex = 0.8, legend = plot_legend)
Expand Down
82 changes: 41 additions & 41 deletions R/process_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,34 @@

#' Process and Check Aggregated data format
#'
#' @param raw.agd
#' @param raw_agd
#'
#' @return
#' @export
process.agd <- function(raw.agd) {
process_agd <- function(raw_agd) {

raw.agd <- as.data.frame(raw.agd)
raw_agd <- as.data.frame(raw_agd)
# make all column names to be capital, avoid different style
names(raw.agd) <- toupper(names(raw.agd))
names(raw_agd) <- toupper(names(raw_agd))

# regulaized column name patterns
must_exist <- c("STUDY","ARM", "N")
legal_suffix <- c("MEAN","MEDIAN","SD","COUNT")

# swap "TREATMENT" column to "ARM", if applicable
if("TREATMENT"%in%names(raw.agd) & (!"ARM"%in%names(raw.agd))){
raw.agd$ARM <- raw.agd$TREATMENT
raw.agd <- raw.agd[,names(raw.agd)!="TREATMENT"]
if("TREATMENT"%in%names(raw_agd) & (!"ARM"%in%names(raw_agd))){
raw_agd$ARM <- raw_agd$TREATMENT
raw_agd <- raw_agd[,names(raw_agd)!="TREATMENT"]
warning("'TREATMENT' column is renamed as 'ARM'")
}

# check: must exist
if(!all(must_exist%in%names(raw.agd))){
stop("At least 1 of the must-exists columns (STUDY, ARM, N) cannot be found in raw.agd!")
if(!all(must_exist%in%names(raw_agd))){
stop("At least 1 of the must-exists columns (STUDY, ARM, N) cannot be found in raw_agd!")
}

# check: legal suffix
other_colnames <- setdiff(names(raw.agd),must_exist)
other_colnames <- setdiff(names(raw_agd),must_exist)
ind1 <- grepl("_",other_colnames,fixed=TRUE)
ind2 <- sapply(other_colnames,function(xx){
tmp <- unlist(strsplit(xx,split="_"))
Expand All @@ -40,7 +40,7 @@ process.agd <- function(raw.agd) {
ind2 <- (ind2%in%legal_suffix)

use_cols <- other_colnames[ind1&ind2]
use_agd <- raw.agd[,c(must_exist,use_cols),drop=FALSE]
use_agd <- raw_agd[,c(must_exist,use_cols),drop=FALSE]
if(!all(other_colnames%in%use_cols)){
warning(paste0("following columns are ignored since it does not following the naming convention:",
paste(setdiff(other_colnames,use_cols),collapse = ",")))
Expand All @@ -66,26 +66,26 @@ process.agd <- function(raw.agd) {

#' Process Individual Patient data to dummize categorical effective modifiers
#'
#' @param raw.ipd
#' @param dummize.cols
#' @param dummize.ref.level
#' @param raw_ipd
#' @param dummize_cols
#' @param dummize_ref_level
#'
#' @return
#' @export

process.ipd <- function(raw.ipd, dummize.cols, dummize.ref.level){
for(i in 1:length(dummize.cols)){
yy <- raw.ipd[[ dummize.cols[i] ]]
process_ipd <- function(raw_ipd, dummize_cols, dummize_ref_level){
for(i in 1:length(dummize_cols)){
yy <- raw_ipd[[ dummize_cols[i] ]]
yy_levels <- na.omit(unique(yy))
yy <- factor( as.character(yy), levels=c(dummize.ref.level[i], setdiff(yy_levels,dummize.ref.level[i])) )
yy <- factor( as.character(yy), levels=c(dummize_ref_level[i], setdiff(yy_levels,dummize_ref_level[i])) )
new_yy <- sapply(levels(yy)[-1],function(j){
as.numeric(yy == j)
})
new_yy <- as.data.frame(new_yy)
names(new_yy) <- toupper(paste(dummize.cols[i],levels(yy)[-1], sep="_"))
raw.ipd <- cbind(raw.ipd,new_yy)
names(new_yy) <- toupper(paste(dummize_cols[i],levels(yy)[-1], sep="_"))
raw_ipd <- cbind(raw_ipd,new_yy)
}
raw.ipd
raw_ipd
}


Expand All @@ -96,7 +96,7 @@ process.ipd <- function(raw.ipd, dummize.cols, dummize.ref.level){
#'
#' @return
#' @export
center.ipd <- function(ipd,agd){
center_ipd <- function(ipd,agd){
# regulaized column name patterns
must_exist <- c("STUDY","ARM", "N")
legal_suffix <- c("MEAN","MEDIAN","SD","PROP")
Expand Down Expand Up @@ -180,41 +180,41 @@ ext_tte_transfer <- function(dd, time_scale = "month", trt = NULL) {
#' @param use_agd
#'
#' @return
complete.agd <- function(use.agd) {
use.agd <- as.data.frame(use.agd)
use.agd <- with(use.agd, {use.agd[tolower(ARM)!="total",,drop=FALSE]})
complete_agd <- function(use_agd) {
use_agd <- as.data.frame(use_agd)
use_agd <- with(use_agd, {use_agd[tolower(ARM)!="total",,drop=FALSE]})

if(nrow(use.agd)<2) stop("error in call complete_agd: need to have at least 2 rows that ARM!='total' ")
if(nrow(use_agd)<2) stop("error in call complete_agd: need to have at least 2 rows that ARM!='total' ")

rowId <- nrow(use.agd)+1
use.agd[rowId, ] <- NA
use.agd$STUDY[rowId] <- use.agd$STUDY[1]
use.agd$ARM[rowId] <- "total"
rowId <- nrow(use_agd)+1
use_agd[rowId, ] <- NA
use_agd$STUDY[rowId] <- use_agd$STUDY[1]
use_agd$ARM[rowId] <- "total"

# complete N and count
NN <- use.agd$N[rowId] <- sum(use.agd$N, na.rm=TRUE)
nn <- use.agd$N[-rowId]
for(i in grep("_COUNT$",names(use.agd),value=TRUE)){
use.agd[[i]][rowId] <- sum(use.agd[[i]], na.rm=TRUE)
NN <- use_agd$N[rowId] <- sum(use_agd$N, na.rm=TRUE)
nn <- use_agd$N[-rowId]
for(i in grep("_COUNT$",names(use_agd),value=TRUE)){
use_agd[[i]][rowId] <- sum(use_agd[[i]], na.rm=TRUE)
}

# complete MEAN
for(i in grep("_MEAN$",names(use.agd),value=TRUE)){
use.agd[[i]][rowId] <- sum(use.agd[[i]]*nn)/NN
for(i in grep("_MEAN$",names(use_agd),value=TRUE)){
use_agd[[i]][rowId] <- sum(use_agd[[i]]*nn)/NN
}

# complete SD
for(i in grep("_SD$",names(use.agd),value=TRUE)){
use.agd[[i]][rowId] <- sqrt( sum(use.agd[[i]]^2*(nn-1))/(N-1) )
for(i in grep("_SD$",names(use_agd),value=TRUE)){
use_agd[[i]][rowId] <- sqrt( sum(use_agd[[i]]^2*(nn-1))/(N-1) )
}

# complete MEDIAN, approximately!!
for(i in grep("_MEDIAN$",names(use.agd),value=TRUE)){
use.agd[[i]][rowId] <- mean(use.agd[[i]][-rowId])
for(i in grep("_MEDIAN$",names(use_agd),value=TRUE)){
use_agd[[i]][rowId] <- mean(use_agd[[i]][-rowId])
}

# output
use.agd
use_agd
}


Loading

0 comments on commit 567b256

Please sign in to comment.