Skip to content

Commit

Permalink
Add external pointer implementation (#31)
Browse files Browse the repository at this point in the history
* Start

* Use Rcpp::XPtr to avoid some data copying

- Yields 2x speedup

* Finish Xptr implementation
  • Loading branch information
alexpghayes authored May 8, 2024
1 parent 8eee280 commit d92eeb9
Show file tree
Hide file tree
Showing 9 changed files with 717 additions and 3 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: fastadi
Title: Self-Tuning Data Adaptive Matrix Imputation
Version: 0.1.1.9000
Version: 0.1.1.9001
Authors@R:
c(person(given = "Alex",
family = "Hayes",
Expand Down Expand Up @@ -50,5 +50,5 @@ LinkingTo:
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE, roclets = c ("namespace", "rd"))
RoxygenNote: 7.1.2
RoxygenNote: 7.3.1.9000
Config/testthat/edition: 3
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,14 @@ S3method(adaptive_initialize,sparseMatrix)
S3method(citation_impute,LRMF)
S3method(citation_impute,default)
S3method(citation_impute,sparseMatrix)
S3method(citation_impute2,LRMF)
S3method(citation_impute2,default)
S3method(citation_impute2,sparseMatrix)
S3method(print,adaptive_imputation)
export(adaptive_impute)
export(adaptive_initialize)
export(citation_impute)
export(citation_impute2)
import(LRMF3)
import(Matrix)
importFrom(LRMF3,dim_and_class)
Expand Down
16 changes: 16 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,19 @@ relative_f_norm_change_impl <- function(new_U, new_d, new_V, U, d, V, num_thread
.Call(`_fastadi_relative_f_norm_change_impl`, new_U, new_d, new_V, U, d, V, num_threads)
}

makeCitationEstimate <- function(citations, U, d, V) {
.Call(`_fastadi_makeCitationEstimate`, citations, U, d, V)
}

left <- function(x, A) {
.Call(`_fastadi_left`, x, A)
}

right <- function(x, A) {
.Call(`_fastadi_right`, x, A)
}

p_omega_z <- function(A) {
.Call(`_fastadi_p_omega_z`, A)
}

273 changes: 273 additions & 0 deletions R/citation-impute2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
#' CitationImpute
#'
#' An implementation of the `AdaptiveImpute` algorithm using efficient
#' sparse matrix computations, specialized for the case when missing
#' values in the upper triangle are taken to be *explicitly observed*
#' zeros, as opposed to missing values. This is primarily
#' useful for spectral decompositions of adjacency matrices of graphs
#' with (near) tree structure, such as citation networks.
#'
#' @details If OpenMP is available, `citation_impute` will automatically
#' use `getOption("Ncpus", 1L)` OpenMP threads to parallelize some
#' key computations. Note that some computations are performed with
#' the Armadillo C++ linear algebra library and may also be parallelized
#' dependent on your BLAS and LAPACK installations and configurations.
#'
#' @param X A *square* sparse matrix of [Matrix::sparseMatrix()] class.
#' Implicit zeros in the upper triangle of this matrix are considered
#' observed and predictions on these elements contribute to the
#' objective function minimized by `AdaptiveImpute`.
#'
#' @inherit adaptive_impute params return
#' @export
#' @include masked_approximation
#'
#' @examples
#'
#' # create a (binary) square sparse matrix to demonstrate on
#'
#' set.seed(887)
#'
#' n <- 1000
#' A <- rsparsematrix(n, n, 0.1, rand.x = NULL) * 1
#' A <- as(triu(A), "generalMatrix")
#'
#' mf <- citation_impute(A, rank = 50, max_iter = 10L, check_interval = NULL)
#' mf
#'
#'
#' mf2 <- citation_impute2(A, rank = 50L, max_iter = 10L, check_interval = NULL)
#' mf2
#'
#'
citation_impute2 <- function(
X,
rank,
...,
initialization = c("svd", "adaptive-initialize", "approximate"),
max_iter = 200L,
check_interval = 1L,
epsilon = 1e-7,
additional = NULL
) {

ellipsis::check_dots_used()

rank <- as.integer(rank)

if (length(rank) > 1)
stop(
"`rank` must be an integer vector with a single element.",
call. = FALSE
)

if (length(max_iter) > 1)
stop(
"`max_iter` must be an integer vector with a single element.",
call. = FALSE
)

if (!is.null(check_interval) && length(check_interval) > 1)
stop(
"`check_interval` must be a single integer, or NULL.",
call. = FALSE
)

if (rank <= 1 || rank >= min(nrow(X), ncol(X)))
stop(
"rank must satisfy 1 < rank < min(nrow(X), ncol(X)).",
call. = FALSE
)

if (max_iter < 1)
stop("`max_iter` must be an integer >= 1L.", call. = FALSE)

if (!is.null(check_interval) && check_interval < 1)
stop("`check_interval` must be an integer >= 1L, or NULL.", call. = FALSE)

UseMethod("citation_impute2")
}

#' @export
citation_impute2.default <- function(
X,
rank,
...,
initialization = c("svd", "adaptive-initialize", "approximate"),
max_iter = 200L,
check_interval = 1L,
epsilon = 1e-7,
additional = NULL) {

stop(
glue("No `citation_impute` method for objects of class {class(X)}."),
call. = FALSE
)
}

#' @export
#' @rdname citation_impute2
citation_impute2.sparseMatrix <- function(
X,
rank,
...,
initialization = c("svd", "adaptive-initialize", "approximate"),
additional = NULL
) {

initialization <- match.arg(initialization)

# *explicitly* observed elements of X

obs_upper <- nnzero(triu(X))
obs_lower <- nnzero(tril(X, k = -1))
obs_total <- nnzero(X)

log_info(
glue(
"Matrix has {obs_total} non-zero elements, {obs_upper} in the upper ",
"triangle (including the diagonal), and {obs_lower} in the strict ",
"lower triangle."
)
)

log_info(glue("Using {initialization} initialization."))

if (initialization == "svd") {
# multiply by one to coerce to type that svds can handle,
# svds doesn't like binary matrices
s <- svds(X * 1, rank)
mf <- as_svd_like(s)
} else if (initialization == "adaptive-initialize") {

# *total* observed elements of X, including entries in the
# upper triangle that are implicitly observed

n <- ncol(X) # recall that X is square
implicit_total <- (n - 1) * (n - 2) / 2 + obs_lower
p_hat <- implicit_total / prod(dim(X))

mf <- adaptive_initialize(X * 1, rank, p_hat = p_hat)
} else if (initialization == "approximate") {

if (is.null(additional))
stop(
"Must specify `additional` when using approximate initialization.",
call. = FALSE
)

# *total* observed elements of X, including entries in the
# upper triangle that are implicitly observed

n <- ncol(X) # recall that X is square
implicit_total <- (n - 1) * (n - 2) / 2 + obs_lower
p_hat <- implicit_total / prod(dim(X))

mf <- adaptive_initialize(
X * 1, rank = rank,
p_hat = p_hat,
alpha_method = "approximate",
additional = additional
)

} else {
stop("This should not happen.", call. = FALSE)
}

log_info("Done initializing.")

citation_impute2.LRMF(mf, X, ...)
}

#' @export
#' @rdname citation_impute2
citation_impute2.LRMF <- function(
X,
rank,
...,
epsilon = 1e-7,
max_iter = 200L,
check_interval = 1L
) {

log_info(glue("Beginning AdaptiveImpute (max {max_iter} iterations)."))

if (!is.null(check_interval))
log_info(glue("Checking convergence every {check_interval} iteration(s)."))

# first argument is the svd_like object, second is the data
# do some renaming here

s <- X
X <- rank
rank <- s$rank

### ITERATION STAGE

delta <- Inf
d <- ncol(X)
norm_X <- norm(X, type = "F")^2
iter <- 1L

while (delta > epsilon) {

# update s: lines 4 and 5
# take the SVD of M-tilde

m <- makeCitationEstimate(X, s$u, s$d, s$v)

s_new <- svds(
left,
k = rank,
Atrans = right,
dim = dim(X),
args = m
)

X_tilde_f_norm <- norm_X + sum(s$d^2) -
p_omega_z(m)

alpha <- (X_tilde_f_norm - sum(s_new$d^2)) / (d - rank) # line 6

s_new$d <- sqrt(s_new$d^2 - alpha) # line 7

# save a little bit on computation and only check for
# convergence intermittently
if (!is.null(check_interval) && iter %% check_interval == 0) {
log_debug("Computing relative change in Frobenius norm.")
delta <- relative_f_norm_change(s_new, s)
}

s <- s_new

log_info(
glue(
"Iter {iter} complete. ",
"delta = {if (!is.null(check_interval)) delta else Inf}, ",
"alpha = {alpha}"
)
)

assert_alpha_positive(alpha)

iter <- iter + 1

if (iter > max_iter) {
warning(
"\nReached maximum allowed iterations. Returning early.\n",
call. = FALSE
)
break
}

}

adaptive_imputation(
u = s$u,
d = s$d,
v = s$v,
alpha = alpha,
...
)

}
Loading

0 comments on commit d92eeb9

Please sign in to comment.