Cleaning and organization

ngreifer · Nov 12, 2024 · 9282bb9 · 9282bb9
1 parent b04a795
commit 9282bb9
Show file tree

Hide file tree

Showing 19 changed files with 152 additions and 148 deletions.
diff --git a/R/aux_functions.R b/R/aux_functions.R
@@ -71,7 +71,7 @@ info.to.method <- function(info) {
     else "without replacement"
   }
 
-  firstup(do.call("paste", c(unname(out.list), list(sep = " "))))
+  firstup(do.call("paste", unname(out.list)))
 }
 
 info.to.distance <- function(info) {
@@ -153,11 +153,9 @@ exactify <- function(X, nam = NULL, sep = "|", include_vars = FALSE, justify = "
     }
 
     lev <- {
-      if (include_vars) {
-        sprintf("%s = %s",
-                names(X)[i],
-                add_quotes(unique_x, is.character(X[[i]]) || is.factor(X[[i]])))
-      }
+      if (include_vars) sprintf("%s = %s",
+                                names(X)[i],
+                                add_quotes(unique_x, chk::vld_character_or_factor(X[[i]])))
       else if (is_null(justify)) unique_x
       else format(unique_x, justify = justify)
     }
@@ -190,7 +188,9 @@ get.covs.matrix <- function(formula = NULL, data = NULL) {
                     na.action = na.pass)
 
   chars.in.mf <- vapply(mf, is.character, logical(1L))
-  mf[chars.in.mf] <- lapply(mf[chars.in.mf], as.factor)
+  for (i in which(chars.in.mf)) {
+    mf[[i]] <- as.factor(mf[[i]])
+  }
 
   mf <- droplevels(mf)
 

diff --git a/R/discard.R b/R/discard.R
@@ -4,7 +4,7 @@ discard <- function(treat, pscore = NULL, option = NULL) {
 
   if (is_null(option)){
     # keep all units
-    return(setNames(rep(FALSE, n.obs), names(treat)))
+    return(rep_with(FALSE, treat))
   }
 
   if (is.logical(option) && length(option) == n.obs && !anyNA(option)) {
@@ -20,7 +20,7 @@ discard <- function(treat, pscore = NULL, option = NULL) {
 
   if (option == "none") {
     # keep all units
-    return(setNames(rep(FALSE, n.obs), names(treat)))
+    return(rep_with(FALSE, treat))
   }
 
   if (is_null(pscore)) {
@@ -50,7 +50,7 @@ discard <- function(treat, pscore = NULL, option = NULL) {
   #   X <- model.matrix(reformulate(names(covs), intercept = FALSE), data = covs,
   #                     contrasts.arg = lapply(Filter(is.factor, covs),
   #                                            function(x) contrasts(x, contrasts = nlevels(x) == 1)))
-  #   discarded <- rep(FALSE, n.obs)
+  #   discarded <- rep.int(FALSE, n.obs)
   #   if (option == "hull.control"){ # discard units not in T convex hull
   #     wif <- WhatIf::whatif(cfact = X[treat==0,], data = X[treat==1,])
   #     discarded[treat==0] <- !wif$in.hull

diff --git a/R/get_weights_from_subclass.R b/R/get_weights_from_subclass.R
@@ -16,7 +16,7 @@ get_weights_from_subclass <- function(psclass, treat, estimand = "ATT") {
     .err("No control units were matched")
   }
 
-  weights <- setNames(rep(0.0, length(treat)), names(treat))
+  weights <- rep_with(0.0, treat)
 
   if (!is.factor(psclass)) {
     psclass <- factor(psclass, nmax = min(length(i1), length(i0)))

diff --git a/R/input_processing.R b/R/input_processing.R
@@ -441,7 +441,7 @@ process.caliper <- function(caliper = NULL, method = NULL, data = NULL, covs = N
   #Check std.caliper
   chk::chk_logical(std.caliper)
   if (length(std.caliper) == 1L) {
-    std.caliper <- setNames(rep.int(std.caliper, length(caliper)), names(caliper))
+    std.caliper <- rep_with(std.caliper, caliper)
   }
   else if (length(std.caliper) == length(caliper)) {
     names(std.caliper) <- names(caliper)
@@ -464,7 +464,7 @@ process.caliper <- function(caliper = NULL, method = NULL, data = NULL, covs = N
     else if (cal.in.covs[x]) var <- covs[[x]]
     else var <- mahcovs[[x]]
 
-    is.factor(var) || is.character(var)
+    chk::vld_character_or_factor(var)
   }, logical(1L))
 
   if (any(cat.vars)) {

diff --git a/R/match.data.R b/R/match.data.R
@@ -305,9 +305,12 @@ get_matches <- function(object, distance = "distance", weights = "weights", subc
   matched <- as.data.frame(matrix(NA_character_, nrow = nrow(mm) + sum(!is.na(mm)), ncol = 3))
   names(matched) <- c(id, subclass, weights)
 
-  matched[[id]] <- c(as.vector(tmm[!is.na(tmm)]), rownames(mm))
-  matched[[subclass]] <- c(as.vector(col(tmm)[!is.na(tmm)]), seq_len(nrow(mm)))
-  matched[[weights]] <- c(1/num.matches[matched[[subclass]][seq_len(sum(!is.na(mm)))]], rep(1, nrow(mm)))
+  matched[[id]] <- c(as.vector(tmm[!is.na(tmm)]),
+                     rownames(mm))
+  matched[[subclass]] <- c(as.vector(col(tmm)[!is.na(tmm)]),
+                           seq_len(nrow(mm)))
+  matched[[weights]] <- c(1 / num.matches[matched[[subclass]][seq_len(sum(!is.na(mm)))]],
+                          rep.int(1, nrow(mm)))
 
   if (is_not_null(object$s.weights) && include.s.weights) {
     matched[[weights]] <- matched[[weights]] * object$s.weights[matched[[id]]]

diff --git a/R/match.qoi.R b/R/match.qoi.R
@@ -6,7 +6,7 @@ bal1var <- function(xx, tt, ww = NULL, s.weights, subclass = NULL, mm = NULL,
   un <- is_null(ww)
   bin.var <- all(xx == 0 | xx == 1)
 
-  xsum <- rep(NA_real_, 7)
+  xsum <- rep.int(NA_real_, 7L)
   if (standardize)
     names(xsum) <- c("Means Treated","Means Control", "Std. Mean Diff.",
                      "Var. Ratio", "eCDF Mean", "eCDF Max", "Std. Pair Dist.")
@@ -170,7 +170,7 @@ qqsum <- function(x, t, w = NULL, standardize = FALSE) {
   n.obs <- length(x)
 
   if (is_null(w)) {
-    w <- rep(1, n.obs)
+    w <- rep.int(1, n.obs)
   }
 
   if (has_n_unique(x, 2) && all(x == 0 | x == 1)) {

diff --git a/R/matchit.R b/R/matchit.R
@@ -49,8 +49,7 @@
 #' argument controlling the link function used in estimating the distance
 #' measure. Allowable options depend on the specific `distance` value
 #' specified. See [`distance`] for allowable options with each
-#' option. The default is `"logit"`, which, along with `distance = "glm"`, identifies the default measure as logistic regression propensity
-#' scores.
+#' option. The default is `"logit"`, which, along with `distance = "glm"`, identifies the default measure as logistic regression propensity scores.
 #' @param distance.options a named list containing additional arguments
 #' supplied to the function that estimates the distance measure as determined
 #' by the argument to `distance`. See [`distance`] for an
@@ -70,8 +69,7 @@
 #' within propensity score calipers, where the propensity scores are computed
 #' using `formula` and `distance`. Can be specified as a string
 #' containing the names of variables in `data` to be used or a one-sided
-#' formula with the desired variables on the right-hand side (e.g., `~ X3 + X4`). See the individual methods pages for information on whether and how
-#' this argument is used.
+#' formula with the desired variables on the right-hand side (e.g., `~ X3 + X4`). See the individual methods pages for information on whether and how this argument is used.
 #' @param antiexact for methods that allow it, for which variables anti-exact
 #' matching should take place. Anti-exact matching ensures paired individuals
 #' do not have the same value of the anti-exact matching variable(s). Can be
@@ -246,8 +244,7 @@
 #' treated unit across its matches. For example, if a control unit was matched
 #' to a treated unit that had two other control units matched to it, and that
 #' same control was matched to a treated unit that had one other control unit
-#' matched to it, the control unit in question would get a weight of 1/3 + 1/2
-#' = 5/6. For the ATC, the same is true with the treated and control labels
+#' matched to it, the control unit in question would get a weight of \eqn{1/3 + 1/2 = 5/6}. For the ATC, the same is true with the treated and control labels
 #' switched. The weights are computed using the `match.matrix` component
 #' of the `matchit()` output object.
 #'
@@ -268,7 +265,7 @@
 #' @return When `method` is something other than `"subclass"`, a
 #' `matchit` object with the following components:
 #'
-#' \item{match.matrix}{a matrix containing the matches. The rownames correspond
+#' \item{match.matrix}{a matrix containing the matches. The row names correspond
 #' to the treated units and the values in each row are the names (or indices)
 #' of the control units matched to each treated unit. When treated units are
 #' matched to different numbers of control units (e.g., with variable ratio matching or
@@ -284,30 +281,22 @@
 #' specified as a method of estimating propensity scores. When
 #' `reestimate = TRUE`, this is the model estimated after discarding
 #' units.}
-#' \item{X}{a data frame of covariates mentioned in `formula`,
-#' `exact`, `mahvars`, `caliper`, and `antiexact`.}
+#' \item{X}{a data frame of covariates mentioned in `formula`, `exact`, `mahvars`, `caliper`, and `antiexact`.}
 #' \item{call}{the `matchit()` call.}
-#' \item{info}{information on the matching method and
-#' distance measures used.}
-#' \item{estimand}{the argument supplied to
-#' `estimand`.}
+#' \item{info}{information on the matching method and distance measures used.}
+#' \item{estimand}{the argument supplied to `estimand`.}
 #' \item{formula}{the `formula` supplied.}
 #' \item{treat}{a vector of treatment status converted to zeros (0) and ones
 #' (1) if not already in that format.}
 #' \item{distance}{a vector of distance
 #' values (i.e., propensity scores) when `distance` is supplied as a
 #' method of estimating propensity scores or a numeric vector.}
 #' \item{discarded}{a logical vector denoting whether each observation was
-#' discarded (`TRUE`) or not (`FALSE`) by the argument to
-#' `discard`.}
-#' \item{s.weights}{the vector of sampling weights supplied to
-#' the `s.weights` argument, if any.}
-#' \item{exact}{a one-sided formula
-#' containing the variables, if any, supplied to `exact`.}
-#' \item{mahvars}{a one-sided formula containing the variables, if any,
-#' supplied to `mahvars`.}
-#' \item{obj}{when `include.obj = TRUE`, an
-#' object containing the intermediate results of the matching procedure. See
+#' discarded (`TRUE`) or not (`FALSE`) by the argument to `discard`.}
+#' \item{s.weights}{the vector of sampling weights supplied to the `s.weights` argument, if any.}
+#' \item{exact}{a one-sided formula containing the variables, if any, supplied to `exact`.}
+#' \item{mahvars}{a one-sided formula containing the variables, if any, supplied to `mahvars`.}
+#' \item{obj}{when `include.obj = TRUE`, an object containing the intermediate results of the matching procedure. See
 #' the individual methods pages for what this component will contain.}
 #'
 #' When `method = "subclass"`, a `matchit.subclass` object with the same
@@ -386,7 +375,7 @@
 #'                    discard = "control", subclass = 10)
 #' s.out1
 #' summary(s.out1, un = TRUE)
-#'
+
 #' @export
 matchit <- function(formula,
                     data = NULL,
@@ -429,7 +418,7 @@ matchit <- function(formula,
   }
 
   #Process formula and data inputs
-  if (is_null(formula) || !rlang::is_formula(formula, lhs = TRUE)) {
+  if (!rlang::is_formula(formula, lhs = TRUE)) {
     .err("`formula` must be a formula relating treatment to covariates")
   }
 
@@ -453,8 +442,8 @@ matchit <- function(formula,
                                  reestimate = reestimate, s.weights = s.weights, replace = replace,
                                  ratio = ratio, m.order = m.order, estimand = estimand)
 
-  if (is_not_null(ignored.inputs)) {
-    for (i in ignored.inputs) assign(i, NULL)
+  for (i in ignored.inputs) {
+    assign(i, NULL)
   }
 
   #Process replace
@@ -692,13 +681,15 @@ matchit <- function(formula,
   for (i in X.list.nm) {
     X_tmp <- get0(i, inherits = FALSE)
 
-    if (is_not_null(X_tmp)) {
-      if (is_null(X)) {
-        X <- X_tmp
-      }
-      else if (!all(hasName(X, names(X_tmp)))) {
-        X <- cbind(X, X_tmp[!names(X_tmp) %in% names(X)])
-      }
+    if (is_null(X_tmp)) {
+      next
+    }
+
+    if (is_null(X)) {
+      X <- X_tmp
+    }
+    else if (!all(hasName(X, names(X_tmp)))) {
+      X <- cbind(X, X_tmp[!names(X_tmp) %in% names(X)])
     }
   }