From 844de4e2e6a32fb4b51ed4a7080b923874d5f3b7 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sat, 27 Jul 2024 15:39:48 -0700 Subject: [PATCH 01/26] Initial multi-threaded distance matrix calculation --- smallvis/DESCRIPTION | 3 + smallvis/NAMESPACE | 1 + smallvis/R/RcppExports.R | 11 ++ smallvis/R/cost.R | 200 ++++++++++++++++------------- smallvis/R/smallvis-package.R | 4 + smallvis/R/smallvis.R | 70 +++++++--- smallvis/R/sne.R | 174 ++++++++++++++----------- smallvis/man/smallvis.Rd | 9 +- smallvis/src/.gitignore | 3 + smallvis/src/RcppExports.cpp | 47 +++++++ smallvis/src/matrix.cpp | 116 +++++++++++++++++ smallvis/tests/testthat/test_api.R | 11 ++ 12 files changed, 460 insertions(+), 189 deletions(-) create mode 100644 smallvis/R/RcppExports.R create mode 100644 smallvis/R/smallvis-package.R create mode 100644 smallvis/src/.gitignore create mode 100644 smallvis/src/RcppExports.cpp create mode 100644 smallvis/src/matrix.cpp diff --git a/smallvis/DESCRIPTION b/smallvis/DESCRIPTION index 791e2e7..cb379a1 100644 --- a/smallvis/DESCRIPTION +++ b/smallvis/DESCRIPTION @@ -11,6 +11,7 @@ License: GPL (>= 2) Imports: methods, mize (>= 0.2), + Rcpp, Rfast, rnndescent, vizier @@ -24,3 +25,5 @@ Date: 2017-11-28 URL: http://github.com/jlmelville/smallvis BugReports: http://github.com/jlmelville/smallvis/issues RoxygenNote: 7.3.2 +LinkingTo: + Rcpp diff --git a/smallvis/NAMESPACE b/smallvis/NAMESPACE index f75ad9c..f44dae6 100644 --- a/smallvis/NAMESPACE +++ b/smallvis/NAMESPACE @@ -3,3 +3,4 @@ export(smallvis) export(smallvis_perpstep) export(smallvis_rep) +useDynLib(smallvis, .registration = TRUE) diff --git a/smallvis/R/RcppExports.R b/smallvis/R/RcppExports.R new file mode 100644 index 0000000..923ffe2 --- /dev/null +++ b/smallvis/R/RcppExports.R @@ -0,0 +1,11 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +dist2_cpp <- function(input, n_threads = 1L) { + .Call(`_smallvis_dist2_cpp`, input, n_threads) +} + +dist_cpp <- function(input, n_threads = 1L) { + .Call(`_smallvis_dist_cpp`, input, n_threads) +} + diff --git a/smallvis/R/cost.R b/smallvis/R/cost.R index 58d3874..c00e1ab 100644 --- a/smallvis/R/cost.R +++ b/smallvis/R/cost.R @@ -129,21 +129,21 @@ stop_exaggerating <- function(cost, exaggeration_factor) { largevis <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", gamma = 1, gr_eps = 0.1, normalize = TRUE, eps = 1e-9, row_weight = NULL, - n_threads = 0) { + use_cpp = FALSE, n_threads = 0) { if (!is.null(row_weight)) { row_normalize <- row_weight } else { row_normalize <- TRUE } - lreplace(tsne(perplexity), + lreplace(tsne(perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) cost <- sne_init(cost, X = X, perplexity = perplexity, symmetrize = symmetrize, kernel = inp_kernel, normalize = normalize, verbose = verbose, row_normalize = row_normalize, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps cost$greps1 <- gr_eps - 1 cost @@ -181,20 +181,21 @@ largevis <- function(perplexity, inp_kernel = "gaussian", umap <- function(perplexity, inp_kernel = "skd", symmetrize = "umap", spread = 1, min_dist = 0.001, gr_eps = 0.1, eps = 1e-9, - row_weight = NULL, n_threads = 0) { + row_weight = NULL, n_threads = 0, use_cpp = FALSE) { if (!is.null(row_weight)) { row_normalize <- row_weight } else { row_normalize <- FALSE } - lreplace(tsne(perplexity), + lreplace(tsne(perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = symmetrize, normalize = FALSE, row_normalize = row_normalize, n_threads = n_threads, - verbose = verbose, ret_extra = ret_extra) + verbose = verbose, ret_extra = ret_extra, + use_cpp = use_cpp) cost <- init_ab(cost, spread = spread, min_dist = min_dist, verbose = verbose) @@ -240,14 +241,15 @@ umap <- function(perplexity, inp_kernel = "skd", symmetrize = "umap", # UMAP with the output kernel fixed to the t-distribution tumap <- function(perplexity, inp_kernel = "skd", symmetrize = "umap", - gr_eps = 0.1, eps = 1e-9, row_weight = NULL, n_threads = 0) { + gr_eps = 0.1, eps = 1e-9, row_weight = NULL, n_threads = 0, + use_cpp = FALSE) { if (!is.null(row_weight)) { row_normalize <- row_weight } else { row_normalize <- FALSE } - lreplace(umap(perplexity), + lreplace(umap(perplexity, n_threads = n_threads, use_cpp = use_cpp), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) cost$eps <- eps @@ -255,7 +257,8 @@ tumap <- function(perplexity, inp_kernel = "skd", symmetrize = "umap", cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = symmetrize, normalize = FALSE, row_normalize = row_normalize, n_threads = n_threads, - verbose = verbose, ret_extra = ret_extra) + verbose = verbose, ret_extra = ret_extra, + use_cpp = use_cpp) cost }, gr = function(cost, Y) { @@ -277,15 +280,15 @@ tumap <- function(perplexity, inp_kernel = "skd", symmetrize = "umap", # t-UMAP where output and input affinities are normalized ntumap <- function(perplexity, inp_kernel = "skd", symmetrize = "umap", - gr_eps = 0.1, eps = 1e-9, n_threads = 0) { - lreplace(tumap(perplexity), + gr_eps = 0.1, eps = 1e-9, n_threads = 0, use_cpp = FALSE) { + lreplace(tumap(perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) cost$eps <- eps cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = symmetrize, normalize = TRUE, n_threads = n_threads, verbose = verbose, - ret_extra = ret_extra) + ret_extra = ret_extra, use_cpp = use_cpp) cost }, pfn = function(cost, Y) { @@ -329,15 +332,15 @@ ntumap <- function(perplexity, inp_kernel = "skd", symmetrize = "umap", # Reverse KL divergence rklsne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0) { + n_threads = 0, use_cpp = FALSE) { lreplace( - tsne(perplexity = perplexity), + tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = symmetrize, normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps cost }, @@ -379,15 +382,16 @@ rklsne <- function(perplexity, inp_kernel = "gaussian", # Jensen-Shannon divergence jssne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0) { + n_threads = 0, use_cpp = FALSE) { lreplace( - tsne(perplexity = perplexity, inp_kernel = inp_kernel), + tsne(perplexity = perplexity, inp_kernel = inp_kernel, use_cpp == use_cpp, + n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = symmetrize, normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps cost }, @@ -439,15 +443,16 @@ jssne <- function(perplexity, inp_kernel = "gaussian", # Chi-squared divergence chsne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0) { + n_threads = 0, use_cpp = FALSE) { lreplace( - tsne(perplexity = perplexity, inp_kernel = inp_kernel), + tsne(perplexity = perplexity, inp_kernel = inp_kernel, use_cpp == use_cpp, + n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = symmetrize, normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps cost }, @@ -499,15 +504,16 @@ chsne <- function(perplexity, inp_kernel = "gaussian", # Hellinger distance divergence hlsne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0) { + n_threads = 0, use_cpp = FALSE) { lreplace( - tsne(perplexity = perplexity, inp_kernel = inp_kernel), + tsne(perplexity = perplexity, inp_kernel = inp_kernel, use_cpp == use_cpp, + n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = "symmetric", normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps cost }, @@ -590,7 +596,7 @@ absne <- function(perplexity, inp_kernel = "gaussian", lambda <- ifelse(lambda == 0, 1, sign(lambda)) * eps0 } lreplace( - tsne(perplexity = perplexity), + tsne(perplexity = perplexity, use_cpp = TRUE, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) if (verbose) { @@ -600,7 +606,7 @@ absne <- function(perplexity, inp_kernel = "gaussian", cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = symmetrize, normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = TRUE) cost$inva4 <- 4 / alpha cost$minvab <- -1 / (alpha * beta) cost$inval <- 1 / (alpha * lambda) @@ -653,9 +659,9 @@ absne <- function(perplexity, inp_kernel = "gaussian", # alpha != 0, beta = 0 => lambda = alpha absneb0 <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", alpha = 1, - eps = .Machine$double.eps, n_threads = 0) { + eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { lreplace( - tsne(perplexity = perplexity), + tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) if (verbose) { @@ -665,7 +671,7 @@ absneb0 <- function(perplexity, inp_kernel = "gaussian", cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = symmetrize, normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps @@ -723,9 +729,10 @@ absneb0 <- function(perplexity, inp_kernel = "gaussian", # alpha = -beta != 0 => lambda = 0 absneamb <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", alpha = 1, - eps = .Machine$double.eps, n_threads = 0) { + eps = .Machine$double.eps, n_threads = 0, + use_cpp = FALSE) { lreplace( - tsne(perplexity = perplexity), + tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) if (verbose) { @@ -735,7 +742,7 @@ absneamb <- function(perplexity, inp_kernel = "gaussian", cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = symmetrize, normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$N <- nrow(cost$P) cost$N2 <- (cost$N - 1) * cost$N @@ -792,9 +799,10 @@ absneamb <- function(perplexity, inp_kernel = "gaussian", # alpha = 0, beta != 0 => lambda = beta absnea0 <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", beta = 1, - eps = .Machine$double.eps, n_threads = 0) { + eps = .Machine$double.eps, n_threads = 0, + use_cpp = FALSE) { lreplace( - tsne(perplexity = perplexity), + tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) if (verbose) { @@ -803,7 +811,7 @@ absnea0 <- function(perplexity, inp_kernel = "gaussian", cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = symmetrize, normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps @@ -865,9 +873,9 @@ absnea0 <- function(perplexity, inp_kernel = "gaussian", # alpha = 0, beta = 0 => lambda = 0 absne00 <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0) { + n_threads = 0, use_cpp = FALSE) { lreplace( - tsne(perplexity = perplexity), + tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) if (verbose) { @@ -876,7 +884,7 @@ absne00 <- function(perplexity, inp_kernel = "gaussian", cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = symmetrize, normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps cost }, @@ -926,7 +934,7 @@ absne00 <- function(perplexity, inp_kernel = "gaussian", # alpha-beta divergence abssne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", alpha = 1, lambda = 1, - eps = .Machine$double.eps, n_threads = 0) { + eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { beta <- lambda - alpha eps0 <- 1e-5 @@ -941,9 +949,8 @@ abssne <- function(perplexity, inp_kernel = "gaussian", lambda <- ifelse(lambda == 0, 1, sign(lambda)) * eps0 } lreplace( - tsne(perplexity = perplexity), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c(), - n_threads = n_threads) { + tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), + init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) if (verbose) { tsmessage("Using ABSSNE with alpha = ", formatC(alpha), @@ -952,7 +959,7 @@ abssne <- function(perplexity, inp_kernel = "gaussian", cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = symmetrize, normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$inva4 <- 4 / alpha cost$minvab <- -1 / (alpha * beta) cost$inval <- 1 / (alpha * lambda) @@ -1004,15 +1011,15 @@ abssne <- function(perplexity, inp_kernel = "gaussian", # global-SNE gsne <- function(perplexity, lambda = 1, inp_kernel = "gaussian", symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0) { + use_cpp = FALSE, n_threads = 0) { lreplace( - tsne(perplexity = perplexity), + tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = symmetrize, normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps # Phat isn't affected by early exaggeration, so we cache it once only @@ -1020,7 +1027,7 @@ gsne <- function(perplexity, lambda = 1, inp_kernel = "gaussian", Phat <- as.matrix(X) } else { - Phat <- safe_dist2(X) + Phat <- calc_d2(X, use_cpp = use_cpp, n_threads = n_threads) } Phat <- Phat + 1 @@ -1079,23 +1086,25 @@ gsne <- function(perplexity, lambda = 1, inp_kernel = "gaussian", # Distance Preserving Methods --------------------------------------------- mmds_init <- function(cost, X, max_iter, eps = .Machine$double.eps, - verbose = FALSE, ret_extra = c()) { + verbose = FALSE, ret_extra = c(), use_cpp = FALSE, + n_threads = 1) { tsmessage("Calculating pairwise distances") if (methods::is(X, "dist")) { cost$R <- as.matrix(X) } else { - cost$R <- sqrt(safe_dist2(X)) + cost$R <- calc_d(X, use_cpp = use_cpp, n_threads = n_threads) } cost$eps <- eps cost } # Metric MDS, minimizing strain. -mmds <- function(eps = .Machine$double.eps) { +mmds <- function(eps = .Machine$double.eps, use_cpp = FALSE, n_threads = 1) { list( init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - cost <- mmds_init(cost, X, max_iter, eps, verbose, ret_extra) + cost <- mmds_init(cost, X, max_iter, eps, verbose, ret_extra, + use_cpp = use_cpp, n_threads = n_threads) cost }, pfn = function(cost, Y) { @@ -1109,7 +1118,7 @@ mmds <- function(eps = .Machine$double.eps) { cost }, update = function(cost, Y) { - cost$D <- sqrt(safe_dist2(Y)) + cost$D <- calc_d(Y, use_cpp = use_cpp, n_threads = n_threads) cost }, sentinel = "D", @@ -1128,12 +1137,13 @@ mmds <- function(eps = .Machine$double.eps) { ) } -smmds <- function(eps = .Machine$double.eps) { +smmds <- function(eps = .Machine$double.eps, use_cpp = FALSE, n_threads = 1) { lreplace( - mmds(), + mmds(use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, eps = .Machine$double.eps, verbose = FALSE, ret_extra = c()) { - cost <- mmds_init(cost, X, max_iter, eps, verbose, ret_extra) + cost <- mmds_init(cost, X, max_iter, eps, verbose, ret_extra, + use_cpp = use_cpp, n_threads = n_threads) cost$R2 <- cost$R * cost$R cost$R <- NULL cost @@ -1165,17 +1175,18 @@ smmds <- function(eps = .Machine$double.eps) { res }, update = function(cost, Y) { - cost$D2 <- dist2(Y) + cost$D2 <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) cost }, sentinel = "D2" ) } -sammon <- function(eps = .Machine$double.eps) { - lreplace(mmds(), +sammon <- function(eps = .Machine$double.eps, use_cpp = FALSE, n_threads = 1) { + lreplace(mmds(eps = eps, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - cost <- mmds_init(cost, X, max_iter, eps, verbose, ret_extra) + cost <- mmds_init(cost, X, max_iter, eps, verbose, ret_extra, + use_cpp = use_cpp, n_threads = n_threads) cost$rsum_inv <- 1 / sum(cost$R) cost }, @@ -1193,12 +1204,13 @@ sammon <- function(eps = .Machine$double.eps) { } -gmmds <- function(k, eps = .Machine$double.eps, n_threads = 0) { +gmmds <- function(k, eps = .Machine$double.eps, use_cpp = FALSE, n_threads = 0) { lreplace( - mmds(), + mmds(use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - cost$R <- geodesic(X, k, n_threads = n_threads, verbose = verbose) + cost$R <- geodesic(X, k, n_threads = n_threads, use_cpp = use_cpp, + verbose = verbose) cost$eps <- eps cost }, @@ -1220,13 +1232,14 @@ gmmds <- function(k, eps = .Machine$double.eps, n_threads = 0) { # Define neighborhoods using a radius based on a fraction (f) of all input # distances (sorted by increasing length), don't correct non-neighborhood # distances unless they smaller than the input distance -ballmmds <- function(f = 0.1, eps = .Machine$double.eps) { +ballmmds <- function(f = 0.1, eps = .Machine$double.eps, use_cpp = FALSE, + n_threads = 1) { lreplace( - mmds(), + mmds(use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - cost <- mmds_init(cost = cost, X = X, max_iter = max_iter, eps = eps, verbose = verbose, - ret_extra = ret_extra) - + cost <- mmds_init(cost = cost, X = X, max_iter = max_iter, eps = eps, + verbose = verbose, ret_extra = ret_extra, + use_cpp = use_cpp, n_threads = n_threads) rs <- cost$R[upper.tri(cost$R)] rmax <- Rfast::nth(rs, max(1, round(f * length(rs)))) if (verbose) { @@ -1274,7 +1287,7 @@ ballmmds <- function(f = 0.1, eps = .Machine$double.eps) { res }, update = function(cost, Y) { - cost$D <- sqrt(safe_dist2(Y)) + cost$D <- calc_d(Y) cost } ) @@ -1282,12 +1295,14 @@ ballmmds <- function(f = 0.1, eps = .Machine$double.eps) { # Create the symmetrized knn graph, don't correct non-neighborhood distances # unless they smaller than the input distance -knnmmds <- function(k, eps = .Machine$double.eps, n_threads = 0) { +knnmmds <- function(k, eps = .Machine$double.eps, use_cpp = FALSE, + n_threads = 0) { lreplace( - mmds(), + mmds(use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { cost <- mmds_init(cost = cost, X = X, max_iter = max_iter, eps = eps, - verbose = verbose, ret_extra = ret_extra) + verbose = verbose, ret_extra = ret_extra, + use_cpp = use_cpp, n_threads = n_threads) knn <- knn_graph(X = X, k = k, n_threads = n_threads, verbose = verbose) # symmetrize cost$knn <- pmax(knn, t(knn)) @@ -1310,6 +1325,7 @@ knnmmds <- function(k, eps = .Machine$double.eps, n_threads = 0) { eps <- cost$eps R <- cost$R D <- cost$D + K <- -4 * (R - D) / (D + eps) knn <- cost$knn @@ -1332,7 +1348,7 @@ knnmmds <- function(k, eps = .Machine$double.eps, n_threads = 0) { res }, update = function(cost, Y) { - cost$D <- sqrt(safe_dist2(Y)) + cost$D <- calc_d(Y, use_cpp = use_cpp, n_threads = n_threads) cost } ) @@ -1349,7 +1365,7 @@ knnmmds <- function(k, eps = .Machine$double.eps, n_threads = 0) { # squared input distances. Otherwise, no weighting is applied. ee <- function(perplexity, lambda = 100, neg_weights = TRUE, inp_kernel = "gaussian", symmetrize = "symmetric", - eps = .Machine$double.eps, n_threads = 0) { + eps = .Machine$double.eps, use_cpp = FALSE, n_threads = 0) { list( init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) @@ -1358,7 +1374,7 @@ ee <- function(perplexity, lambda = 100, neg_weights = TRUE, R <- X } else { - R <- sqrt(safe_dist2(X)) + R <- calc_d(X, use_cpp = use_cpp, n_threads = n_threads) } cost$Vn <- R / sum(R) } @@ -1368,7 +1384,7 @@ ee <- function(perplexity, lambda = 100, neg_weights = TRUE, cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = symmetrize, normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps cost }, @@ -1423,17 +1439,17 @@ ee <- function(perplexity, lambda = 100, neg_weights = TRUE, # default lambda = 0.9 from "Majorization-Minimization for Manifold Embedding" # Yang, Peltonen, Kaski 2015 nerv <- function(perplexity, lambda = 0.9, inp_kernel = "gaussian", - eps = .Machine$double.eps, n_threads = 0) { + eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { lambda2 <- 2 * lambda oml <- 1 - lambda oml2 <- 2 * oml lreplace( - tsne(perplexity = perplexity), + tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = "none", normalize = FALSE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps cost }, @@ -1546,7 +1562,7 @@ snerv <- function(perplexity, lambda = 0.9, inp_kernel = "gaussian", # kappa = 0 behaves like ASNE # kappa = 1 behaves like NeRV with lambda = 0. Yes that's confusing. jse <- function(perplexity, kappa = 0.5, inp_kernel = "gaussian", - eps = .Machine$double.eps, n_threads = 0) { + eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { eps0 <- 1e-5 kappa <- max(kappa, eps0) kappa <- min(kappa, 1 - eps0) @@ -1557,12 +1573,12 @@ jse <- function(perplexity, kappa = 0.5, inp_kernel = "gaussian", om_kappa_inv <- 1 / om_kappa lreplace( - ssne(perplexity = perplexity), + ssne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = "none", normalize = FALSE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps cost }, @@ -1666,14 +1682,15 @@ sjse <- function(perplexity, kappa = 0.5, inp_kernel = "gaussian", rsrnerv <- function(perplexity, lambda = 0.9, eps = .Machine$double.eps, - n_threads = 0) { - lreplace(nerv(perplexity = perplexity, lambda = lambda), + n_threads = 0, use_cpp = FALSE) { + lreplace(nerv(perplexity = perplexity, lambda = lambda, use_cpp = use_cpp, + n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { cost <- sne_init(cost, X, perplexity = perplexity, symmetrize = "symmetric", normalize = FALSE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) P <- cost$P P <- P / rowSums(P) cost$P <- P @@ -1684,14 +1701,16 @@ rsrnerv <- function(perplexity, lambda = 0.9, eps = .Machine$double.eps, ) } -rsrjse <- function(perplexity, kappa = 0.5, eps = .Machine$double.eps, n_threads = 0) { - lreplace(jse(perplexity = perplexity, kappa = kappa), +rsrjse <- function(perplexity, kappa = 0.5, eps = .Machine$double.eps, + n_threads = 0, use_cpp = FALSE) { + lreplace(jse(perplexity = perplexity, kappa = kappa, use_cpp = use_cpp, + n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { cost <- sne_init(cost, X, perplexity = perplexity, symmetrize = "symmetric", normalize = FALSE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) P <- cost$P P <- P / rowSums(P) cost$P <- P @@ -1705,19 +1724,20 @@ rsrjse <- function(perplexity, kappa = 0.5, eps = .Machine$double.eps, n_threads # NeRV with input bandwidths transferred to the output kernel, as in the # original paper. bnerv <- function(perplexity, lambda = 0.9, eps = .Machine$double.eps, - n_threads = 0) { + n_threads = 0, use_cpp = FALSE) { lambda2 <- 2 * lambda oml <- 1 - lambda oml2 <- 2 * oml lreplace( - nerv(perplexity = perplexity, lambda = lambda), + nerv(perplexity = perplexity, lambda = lambda, use_cpp = use_cpp, + n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { ret_extra <- unique(c(ret_extra, 'beta')) cost <- sne_init(cost, X, perplexity = perplexity, symmetrize = "none", normalize = FALSE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps cost$lambda2b <- lambda2 * cost$beta cost$oml2b <- oml2 * cost$beta diff --git a/smallvis/R/smallvis-package.R b/smallvis/R/smallvis-package.R new file mode 100644 index 0000000..37a033d --- /dev/null +++ b/smallvis/R/smallvis-package.R @@ -0,0 +1,4 @@ +## usethis namespace: start +#' @useDynLib smallvis, .registration = TRUE +## usethis namespace: end +NULL \ No newline at end of file diff --git a/smallvis/R/smallvis.R b/smallvis/R/smallvis.R index 0ac0a98..55b2665 100644 --- a/smallvis/R/smallvis.R +++ b/smallvis/R/smallvis.R @@ -524,7 +524,12 @@ #' to those value which are returned when this value is \code{TRUE}. See the #' \code{Value} section for details. #' @param n_threads Number of threads to use in multi-threaded code. Default is -#' 0, which means no multi-threading. +#' 0, which means no multi-threading. Mainly affects the calculation of things +#' like distance matrices if you set \code{use_cpp = TRUE}. Otherwise, only +#' methods that need to calculate nearest neighbors will be affected. +#' @param use_cpp If \code{TRUE} use multi-threaded C++ code to calculate some +#' matrices. Default is \code{FALSE}. This won't speed up all steps and you +#' will want to use this in conjunction with \code{n_threads}. #' @param eps Set epsilon for avoiding division-by-zero errors. Default is #' \code{.Machine$double.eps}, but if you see inconsistent convergence results #' with optimizer that should be reducing the cost each iteration, then try @@ -855,6 +860,7 @@ smallvis <- function(X, k = 2, scale = "absmax", tol_wait = 15, ret_extra = FALSE, n_threads = 0, + use_cpp = FALSE, eps = .Machine$double.eps, verbose = TRUE) { @@ -892,20 +898,20 @@ smallvis <- function(X, k = 2, scale = "absmax", largevis = largevis(perplexity = perplexity, n_threads = n_threads, eps = eps), tumap = tumap(perplexity = perplexity, n_threads = n_threads, eps = eps), ntumap = ntumap(perplexity = perplexity, n_threads = n_threads, eps = eps), - mmds = mmds(eps = eps), - gmmds = gmmds(k = perplexity, n_threads = n_threads, eps = eps), + mmds = mmds(n_threads = n_threads, eps = eps, use_cpp = use_cpp), + gmmds = gmmds(k = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), asne = asne(perplexity = perplexity, n_threads = n_threads, eps = eps), ssne = ssne(perplexity = perplexity, n_threads = n_threads, eps = eps), wtsne = wtsne(perplexity = perplexity, n_threads = n_threads, eps = eps), wssne = wssne(perplexity = perplexity, n_threads = n_threads, eps = eps), hssne = hssne(perplexity = perplexity, n_threads = n_threads, eps = eps), - ee = ee(perplexity = perplexity, n_threads = n_threads, eps = eps), + ee = ee(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), nerv = nerv(perplexity = perplexity, n_threads = n_threads, eps = eps), snerv = snerv(perplexity = perplexity, n_threads = n_threads, eps = eps), jse = jse(perplexity = perplexity, n_threads = n_threads, eps = eps), sjse = sjse(perplexity = perplexity, n_threads = n_threads, eps = eps), - smmds = smmds(eps = eps), - sammon = sammon(eps = eps), + smmds = smmds(n_threads = n_threads, eps = eps, use_cpp = use_cpp), + sammon = sammon(n_threads = n_threads, eps = eps, use_cpp = use_cpp), tasne = tasne(perplexity = perplexity, n_threads = n_threads, eps = eps), trmsne = trmsne(perplexity = perplexity, n_threads = n_threads, eps = eps), trsrsne = trsrsne(perplexity = perplexity, n_threads = n_threads, eps = eps), @@ -918,8 +924,8 @@ smallvis <- function(X, k = 2, scale = "absmax", basne = basne(perplexity = perplexity, n_threads = n_threads, eps = eps), btasne = btasne(perplexity = perplexity, n_threads = n_threads, eps = eps), bnerv = bnerv(perplexity = perplexity, n_threads = n_threads, eps = eps), - ballmmds = ballmmds(eps = eps), - knnmmds = knnmmds(k = perplexity, n_threads = n_threads, eps = eps), + ballmmds = ballmmds(n_threads = n_threads, eps = eps, use_cpp = use_cpp), + knnmmds = knnmmds(k = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), dhssne = dhssne(perplexity = perplexity, n_threads = n_threads, eps = eps), tsneu = tsneu(perplexity = perplexity, n_threads = n_threads, eps = eps), pstsne = pstsne(perplexity = perplexity, n_threads = n_threads, eps = eps), @@ -930,7 +936,7 @@ smallvis <- function(X, k = 2, scale = "absmax", absne = absne(perplexity = perplexity, n_threads = n_threads, eps = eps), chsne = chsne(perplexity = perplexity, n_threads = n_threads, eps = eps), hlsne = hlsne(perplexity = perplexity, n_threads = n_threads, eps = eps), - gsne = gsne(perplexity = perplexity, n_threads = n_threads, eps = eps), + gsne = gsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), rklsne = rklsne(perplexity = perplexity, n_threads = n_threads, eps = eps), jssne = jssne(perplexity = perplexity, n_threads = n_threads, eps = eps), abssne = abssne(perplexity = perplexity, n_threads = n_threads, eps = eps), @@ -1153,7 +1159,8 @@ smallvis <- function(X, k = 2, scale = "absmax", cost_fn = cost_fn, itercosts = itercosts, start_time = start_time, optionals = ret_optionals, pca = ifelse(pca && !whiten, initial_dims, 0), - whiten = ifelse(pca && whiten, initial_dims, 0))) + whiten = ifelse(pca && whiten, initial_dims, 0), + use_cpp = use_cpp, n_threads = n_threads)) } opt_stages <- c() @@ -1387,7 +1394,8 @@ smallvis <- function(X, k = 2, scale = "absmax", exaggeration_factor, late_exaggeration_factor, optionals = ret_optionals, pca = ifelse(pca && !whiten, initial_dims, 0), - whiten = ifelse(pca && whiten, initial_dims, 0)) + whiten = ifelse(pca && whiten, initial_dims, 0), + use_cpp = use_cpp, n_threads = n_threads) res } @@ -1883,13 +1891,13 @@ make_smallvis_cb <- function(df) { palette <- NULL function(Y, iter, cost = NULL) { if (is.null(palette)) { - palette <- vizier:::make_palette(ncolors = nrow(Y), color_scheme = rainbow) + palette <- vizier:::color_helper(df, color_scheme = rainbow)$palette } title <- paste0("iter: ", iter) if (!(is.null(cost) || is.na(cost))) { title <- paste0(title, " cost = ", formatC(cost)) } - vizier::embed_plot(Y, df, title = title, colors = palette) + vizier::embed_plot(Y, df, title = title, color_scheme = palette) } } @@ -1909,7 +1917,7 @@ ret_value <- function(Y, ret_extra, method, X, scale, Y_init, iter, start_time = stop_lying_iter = NULL, start_late_lying_iter = NULL, opt_input = NULL, opt_res = NULL, exaggeration_factor = 1, late_exaggeration_factor = 1, - optionals = c()) { + optionals = c(), use_cpp = FALSE, n_threads = 1) { attr(Y, "dimnames") <- NULL if (ret_extra) { end_time <- Sys.time() @@ -2015,11 +2023,11 @@ ret_value <- function(Y, ret_extra, method, X, scale, Y_init, iter, start_time = res$DX <- X } else { - res$DX <- sqrt(safe_dist2(X)) + res$DX <- calc_d(X, use_cpp = use_cpp, n_threads = n_threads) } } else if (o == "dy") { - res$DY <- sqrt(safe_dist2(Y)) + res$DY <- calc_d(Y, use_cpp = use_cpp, n_threads = n_threads) } if (o == "x") { @@ -2258,7 +2266,8 @@ shannon <- function(D2, beta) { ) } -x2aff_sigma <- function(X, sigma = 1e-3, verbose = FALSE) { +x2aff_sigma <- function(X, sigma = 1e-3, verbose = FALSE, use_cpp = FALSE, + n_threads = 1) { x_is_dist <- methods::is(X, "dist") if (x_is_dist) { D <- X @@ -2267,7 +2276,7 @@ x2aff_sigma <- function(X, sigma = 1e-3, verbose = FALSE) { D <- D * D } else { - D <- safe_dist2(X) + D <- calc_d2(X, use_cpp = use_cpp, n_threads = n_threads) } beta <- 1 / (sigma * sigma) sres <- shannon(D, beta) @@ -2360,11 +2369,12 @@ knn_graph <- function(X, k, n_threads, verbose) { # Given data X and k nearest neighbors, return a geodisic distance matrix # Disconnections are treated by using the Euclidean distance. -geodesic <- function(X, k, fill = TRUE, n_threads = 0, verbose = FALSE) { +geodesic <- function(X, k, fill = TRUE, use_cpp = FALSE, n_threads = 0, + verbose = FALSE) { tsmessage("Calculating geodesic distances with k = ", k) - # The hard work is done by Rfast's implementation of Floyd's algorithm R <- knn_dist(X, k, n_threads = n_threads, verbose = verbose) + # The hard work is done by Rfast's implementation of Floyd's algorithm G <- Rfast::floyd(R) if (any(is.infinite(G)) && fill) { tsmessage("k = ", k, " resulted in disconnections: filling with Euclidean distances") @@ -2372,7 +2382,7 @@ geodesic <- function(X, k, fill = TRUE, n_threads = 0, verbose = FALSE) { R <- as.matrix(X) } else { - R <- sqrt(safe_dist2(X)) + R <- calc_d(X, use_cpp = use_cpp, n_threads = n_threads) } G[is.infinite(G)] <- R[is.infinite(G)] } @@ -2529,6 +2539,24 @@ dist2 <- function(X) { D2 + sweep(X %*% t(X) * -2, 2, t(D2), `+`) } +calc_d2 <- function(X, use_cpp = FALSE, n_threads = 1) { + if (use_cpp) { + dist2_cpp(X, n_threads = n_threads) + } + else { + safe_dist2(X) + } +} + +calc_d <- function(X, use_cpp = FALSE, n_threads = 1) { + if (use_cpp) { + dist_cpp(X, n_threads = n_threads) + } + else { + sqrt(safe_dist2(X)) + } +} + # Squared Euclidean distances, ensuring no small -ve distances can occur safe_dist2 <- function(X) { D2 <- dist2(X) diff --git a/smallvis/R/sne.R b/smallvis/R/sne.R index 882bff3..741aeb4 100644 --- a/smallvis/R/sne.R +++ b/smallvis/R/sne.R @@ -83,7 +83,7 @@ kl_cost <- function(cost, Y) { # t-SNE tsne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", normalize = TRUE, row_normalize = TRUE, - eps = .Machine$double.eps, n_threads = 0) { + eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { list( init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { @@ -92,7 +92,7 @@ tsne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", symmetrize = symmetrize, normalize = normalize, row_normalize = row_normalize, verbose = verbose, ret_extra = ret_extra, - n_threads = 0) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps cost }, @@ -147,10 +147,11 @@ tsne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", # In \emph{International Conference on Artificial Intelligence and Statistics} (pp. 67-74). ssne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0) { + n_threads = 0, use_cpp = FALSE) { lreplace( tsne(perplexity = perplexity, inp_kernel = inp_kernel, - symmetrize = symmetrize, eps = eps, n_threads = n_threads), + symmetrize = symmetrize, eps = eps, n_threads = n_threads, + use_cpp = use_cpp), pfn = kl_costQ, gr = function(cost, Y) { cost <- cost$update(cost, Y) @@ -171,14 +172,14 @@ ssne <- function(perplexity, inp_kernel = "gaussian", # Stochastic neighbor embedding. # In \emph{Advances in neural information processing systems} (pp. 833-840). asne <- function(perplexity, inp_kernel = "gaussian", - eps = .Machine$double.eps, n_threads = 0) { - lreplace(tsne(perplexity), + eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { + lreplace(tsne(perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = "none", normalize = FALSE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps cost }, @@ -234,11 +235,11 @@ hssne <- function(perplexity, alpha = 0.5, inp_kernel = "gaussian", # exists to demonstrate that constant beta doesn't have any meaningful effect # on the results. bhssne <- function(perplexity, alpha = 0.5, beta = 1, - eps = .Machine$double.eps, n_threads = 0) { + eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { alpha <- max(alpha, 1e-8) beta <- max(beta, 1e-8) lreplace( - tsne(perplexity = perplexity), + tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), b4 = 4 * beta, ab = alpha * beta, apow = -1 / alpha, @@ -247,7 +248,7 @@ bhssne <- function(perplexity, alpha = 0.5, beta = 1, cost <- sne_init(cost, X, perplexity = perplexity, symmetrize = "symmetric", normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) # override input bandwidths with fixed beta (although this doesn't do much) if (!is.null(beta)) { cost$beta <- beta @@ -340,15 +341,16 @@ dhssne <- function(perplexity, alpha = 0.5, inp_kernel = "gaussian", # (pp. 460-468). wtsne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0) { - lreplace(tsne(perplexity = perplexity), + n_threads = 0, use_cpp = FALSE) { + lreplace(tsne(perplexity = perplexity, use_cpp = use_cpp, + n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) ret_extra <- c(ret_extra, "pdeg") - cost <- sne_init(cost, X, perplexity = perplexity, - kernel = inp_kernel, symmetrize = symmetrize, - normalize = TRUE, verbose = verbose, - ret_extra = ret_extra, n_threads = n_threads) + cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, + symmetrize = symmetrize, normalize = TRUE, + verbose = verbose, ret_extra = ret_extra, + n_threads = n_threads, use_cpp = use_cpp) # P matrix degree centrality: column sums deg <- cost$pdeg if (verbose) { @@ -381,15 +383,17 @@ wtsne <- function(perplexity, inp_kernel = "gaussian", wssne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0) { - lreplace(ssne(perplexity = perplexity), + n_threads = 0, use_cpp = FALSE) { + lreplace(ssne(perplexity = perplexity, use_cpp = use_cpp, + n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) ret_extra <- c(ret_extra, "pdeg") cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = symmetrize, normalize = TRUE, verbose = verbose, - ret_extra = ret_extra, n_threads = n_threads) + ret_extra = ret_extra, n_threads = n_threads, + use_cpp = use_cpp) # P matrix degree centrality: column sums deg <- cost$pdeg if (verbose) { @@ -416,15 +420,16 @@ wssne <- function(perplexity, inp_kernel = "gaussian", # t-SNE but with the gradient defined in terms of un-normalized weights # Exists entirely as an academic exercise tsneu <- function(perplexity, inp_kernel = "gaussian", - eps = .Machine$double.eps, n_threads = 0) { + eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { lreplace( - tsne(perplexity = perplexity, inp_kernel = inp_kernel), + tsne(perplexity = perplexity, inp_kernel = inp_kernel, use_cpp = use_cpp, + n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { ret_extra = unique(c(ret_extra, "V")) cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = "symmetric", normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps @@ -447,15 +452,16 @@ tsneu <- function(perplexity, inp_kernel = "gaussian", # A pseudo-separable approximation of t-SNE, where the output weight sum is only # recalculated during the epoch pstsne <- function(perplexity, inp_kernel = "gaussian", - eps = .Machine$double.eps, n_threads = 0) { + eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { lreplace( - tsne(perplexity = perplexity, inp_kernel = inp_kernel), + tsne(perplexity = perplexity, inp_kernel = inp_kernel, use_cpp = use_cpp, + n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { ret_extra = unique(c(ret_extra, "V")) cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = "symmetric", normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) # need to row-normalize and symmetrize affinities cost$V <- cost$V / rowSums(cost$V) @@ -503,16 +509,17 @@ pstsne <- function(perplexity, inp_kernel = "gaussian", # EE-like cost function in terms of I-Divergence # Scaled to give a gradient similar in form to t-SNE tee <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", - lambda = 0.01, eps = .Machine$double.eps, n_threads = 0) { + lambda = 0.01, eps = .Machine$double.eps, n_threads = 0, + use_cpp = FALSE) { lreplace( - tsne(perplexity = perplexity), + tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) ret_extra = unique(c(ret_extra, "V", "dint")) cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = symmetrize, normalize = FALSE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) V <- cost$P cost$eps <- eps cost$invN <- 1 / sum(V) @@ -564,15 +571,15 @@ skdtsne <- function(perplexity, eps = .Machine$double.eps, n_threads = 0) { # Use the UMAP curve family in output kernel usne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", spread = 1, min_dist = 0.001, gr_eps = 0.1, - eps = .Machine$double.eps, n_threads = 0) { + eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { lreplace( - tsne(perplexity = perplexity), + tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = symmetrize, normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost <- init_ab(cost, spread = spread, min_dist = min_dist, verbose = verbose) cost$eps <- eps cost @@ -600,14 +607,14 @@ usne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", # UMAP cross entropy cost instead of KL divergence cetsne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0) { - lreplace(tsne(perplexity), + n_threads = 0, use_cpp = FALSE) { + lreplace(tsne(perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = "symmetric", normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps cost @@ -660,14 +667,15 @@ cetsne <- function(perplexity, inp_kernel = "gaussian", # t-SNE with input kernel bandwidths transferred to output btsne <- function(perplexity, inp_kernel = "gaussian", beta = NULL, - eps = .Machine$double.eps, n_threads = 0) { - lreplace(tsne(perplexity = perplexity, inp_kernel = inp_kernel), + eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { + lreplace(tsne(perplexity = perplexity, inp_kernel = inp_kernel, + use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { ret_extra <- unique(c(ret_extra, 'beta')) cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = "symmetric", normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) # override input bandwidths with fixed beta (although this doesn't do much) if (!is.null(beta)) { cost$beta <- beta @@ -694,14 +702,15 @@ btsne <- function(perplexity, inp_kernel = "gaussian", beta = NULL, # SSNE with input kernel bandwidths transferred to output bssne <- function(perplexity, inp_kernel = "gaussian", beta = NULL, - eps = .Machine$double.eps, n_threads = 0) { - lreplace(ssne(perplexity = perplexity, inp_kernel = inp_kernel), + eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { + lreplace(ssne(perplexity = perplexity, inp_kernel = inp_kernel, + use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { ret_extra <- unique(c(ret_extra, 'beta')) cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = "symmetric", normalize = TRUE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) if (!is.null(beta)) { cost$beta <- beta } @@ -724,16 +733,16 @@ bssne <- function(perplexity, inp_kernel = "gaussian", beta = NULL, # ASNE with input kernel bandwidths transferred to output basne <- function(perplexity, beta = NULL, eps = .Machine$double.eps, - n_threads = 0) { + n_threads = 0, use_cpp = FALSE) { lreplace( - asne(perplexity = perplexity), + asne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { ret_extra <- unique(c(ret_extra, 'beta')) cost <- sne_init(cost, X, perplexity = perplexity, symmetrize = "none", normalize = FALSE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) # override input bandwidths with fixed beta (although this doesn't do much) if (!is.null(beta)) { cost$beta <- beta @@ -779,14 +788,15 @@ btasne <- function(perplexity, beta = NULL, eps = .Machine$double.eps, ) } -tasne <- function(perplexity, n_threads = 0) { - lreplace(tsne(perplexity = perplexity), +tasne <- function(perplexity, n_threads = 0, use_cpp = FALSE) { + lreplace(tsne(perplexity = perplexity, use_cpp = use_cpp, + n_threads = n_threads), init = function(cost, X, max_iter, eps = .Machine$double.eps, verbose = FALSE, ret_extra = c()) { cost <- sne_init(cost, X, perplexity = perplexity, symmetrize = "none", normalize = FALSE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps cost }, @@ -814,13 +824,14 @@ tasne <- function(perplexity, n_threads = 0) { # ASNE but with the t-distributed kernel tasne <- function(perplexity, inp_kernel = "gaussian", - eps = .Machine$double.eps, n_threads = 0) { - lreplace(tsne(perplexity = perplexity), + eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { + lreplace(tsne(perplexity = perplexity, n_threads = n_threads, + use_cpp = use_cpp), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - cost <- sne_init(cost, X, perplexity = perplexity, inp_kernel <- inp_kernel, + cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = "none", normalize = FALSE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) cost$eps <- eps cost }, @@ -846,23 +857,26 @@ tasne <- function(perplexity, inp_kernel = "gaussian", # t-RM-SNE # t-SNE without symmetrization of P (but still pair-normalizing) # row-normalize, then matrix normalize -trmsne <- function(perplexity, inp_kernel = "gaussian", eps = .Machine$double.eps, n_threads = 0) { - lreplace(tsne(perplexity = perplexity, inp_kernel = inp_kernel), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = "none", normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) - cost$eps <- eps - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - - W <- cost$W - cost$G <- k2g(Y, 2 * W * (cost$P - W / cost$Z), symmetrize = TRUE) - cost - } +trmsne <- function(perplexity, inp_kernel = "gaussian", + eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { + lreplace( + tsne(perplexity = perplexity, inp_kernel = inp_kernel, + n_threads = n_threads, use_cpp = use_cpp), + init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { + cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, + symmetrize = "none", normalize = TRUE, + verbose = verbose, ret_extra = ret_extra, + n_threads = n_threads, use_cpp = use_cpp) + cost$eps <- eps + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + + W <- cost$W + cost$G <- k2g(Y, 2 * W * (cost$P - W / cost$Z), symmetrize = TRUE) + cost + } ) } @@ -870,14 +884,16 @@ trmsne <- function(perplexity, inp_kernel = "gaussian", eps = .Machine$double.ep # t-SNE but without row-normalizing or symmetrizing, just matrix normalization # Not recommended tmsne <- function(perplexity, inp_kernel = "gaussian", - eps = .Machine$double.eps, n_threads = 0) { - lreplace(trmsne(perplexity = perplexity, inp_kernel = inp_kernel), + eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { + lreplace(trmsne(perplexity = perplexity, inp_kernel = inp_kernel, + eps = eps, n_threads = n_threads, use_cpp = use_cpp), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, symmetrize = "none", row_normalize = FALSE, normalize = TRUE, verbose = verbose, - ret_extra = ret_extra, n_threads = n_threads) + ret_extra = ret_extra, n_threads = n_threads, + use_cpp = use_cpp) cost$eps <- eps cost } @@ -886,14 +902,15 @@ tmsne <- function(perplexity, inp_kernel = "gaussian", # RSR row-normalize, symmetrize, then row-normalize again # Might work a tiny bit better than t-ASNE? -trsrsne <- function(perplexity, eps = .Machine$double.eps, n_threads = 0) { - lreplace(tasne(perplexity), +trsrsne <- function(perplexity, eps = .Machine$double.eps, n_threads = 0, + use_cpp = FALSE) { + lreplace(tasne(perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { cost <- sne_init(cost, X, perplexity = perplexity, symmetrize = "symmetric", normalize = FALSE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) P <- cost$P P <- P / rowSums(P) cost$P <- P @@ -905,14 +922,15 @@ trsrsne <- function(perplexity, eps = .Machine$double.eps, n_threads = 0) { } -arsrsne <- function(perplexity, eps = .Machine$double.eps, n_threads = 0) { - lreplace(asne(perplexity), +arsrsne <- function(perplexity, eps = .Machine$double.eps, n_threads = 0, + use_cpp = FALSE) { + lreplace(asne(perplexity, use_cpp = use_cpp, n_threads = n_threads), init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { cost <- sne_init(cost, X, perplexity = perplexity, symmetrize = "symmetric", normalize = FALSE, verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads) + n_threads = n_threads, use_cpp = use_cpp) P <- cost$P P <- P / rowSums(P) cost$P <- P @@ -970,6 +988,7 @@ sne_init <- function(cost, row_normalize = TRUE, normalize = TRUE, n_threads = 0, + use_cpp = use_cpp, verbose = FALSE, ret_extra = c()) { @@ -1017,7 +1036,8 @@ sne_init <- function(cost, } else if (tolower(kernel) == "sigma") { tsmessage("Using fixed sigma = ", formatC(perplexity)) - x2ares <- x2aff_sigma(X, sigma = perplexity, verbose = verbose) + x2ares <- x2aff_sigma(X, sigma = perplexity, n_threads = n_threads, + use_cpp = use_cpp, verbose = verbose) P <- x2ares$W } else { diff --git a/smallvis/man/smallvis.Rd b/smallvis/man/smallvis.Rd index 7a4e383..b3afb4c 100644 --- a/smallvis/man/smallvis.Rd +++ b/smallvis/man/smallvis.Rd @@ -37,6 +37,7 @@ smallvis( tol_wait = 15, ret_extra = FALSE, n_threads = 0, + use_cpp = FALSE, eps = .Machine$double.eps, verbose = TRUE ) @@ -195,7 +196,13 @@ to those value which are returned when this value is \code{TRUE}. See the \code{Value} section for details.} \item{n_threads}{Number of threads to use in multi-threaded code. Default is -0, which means no multi-threading.} +0, which means no multi-threading. Mainly affects the calculation of things +like distance matrices if you set \code{use_cpp = TRUE}. Otherwise, only +methods that need to calculate nearest neighbors will be affected.} + +\item{use_cpp}{If \code{TRUE} use multi-threaded C++ code to calculate some +matrices. Default is \code{FALSE}. This won't speed up all steps and you +will want to use this in conjunction with \code{n_threads}.} \item{eps}{Set epsilon for avoiding division-by-zero errors. Default is \code{.Machine$double.eps}, but if you see inconsistent convergence results diff --git a/smallvis/src/.gitignore b/smallvis/src/.gitignore new file mode 100644 index 0000000..22034c4 --- /dev/null +++ b/smallvis/src/.gitignore @@ -0,0 +1,3 @@ +*.o +*.so +*.dll diff --git a/smallvis/src/RcppExports.cpp b/smallvis/src/RcppExports.cpp new file mode 100644 index 0000000..8c05cba --- /dev/null +++ b/smallvis/src/RcppExports.cpp @@ -0,0 +1,47 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + +// dist2_cpp +NumericMatrix dist2_cpp(NumericMatrix input, std::size_t n_threads); +RcppExport SEXP _smallvis_dist2_cpp(SEXP inputSEXP, SEXP n_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< NumericMatrix >::type input(inputSEXP); + Rcpp::traits::input_parameter< std::size_t >::type n_threads(n_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(dist2_cpp(input, n_threads)); + return rcpp_result_gen; +END_RCPP +} +// dist_cpp +NumericMatrix dist_cpp(NumericMatrix input, std::size_t n_threads); +RcppExport SEXP _smallvis_dist_cpp(SEXP inputSEXP, SEXP n_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< NumericMatrix >::type input(inputSEXP); + Rcpp::traits::input_parameter< std::size_t >::type n_threads(n_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(dist_cpp(input, n_threads)); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_smallvis_dist2_cpp", (DL_FUNC) &_smallvis_dist2_cpp, 2}, + {"_smallvis_dist_cpp", (DL_FUNC) &_smallvis_dist_cpp, 2}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_smallvis(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/smallvis/src/matrix.cpp b/smallvis/src/matrix.cpp new file mode 100644 index 0000000..5ce07d3 --- /dev/null +++ b/smallvis/src/matrix.cpp @@ -0,0 +1,116 @@ +#include +#include +#include +#include +#include + +using namespace Rcpp; + +void d2(const std::vector &data, std::vector &dist_matrix, + std::size_t start_row, std::size_t end_row, std::size_t n, + std::size_t d) { + for (std::size_t i = start_row; i < end_row; ++i) { + const std::size_t i_d = i * d; + const std::size_t i_n = i * n; + for (std::size_t j = 0; j < n; ++j) { + const std::size_t j_d = j * d; + double dist = 0.0; + for (std::size_t k = 0; k < d; ++k) { + double diff = data[i_d + k] - data[j_d + k]; + dist += diff * diff; + } + dist_matrix[i_n + j] = dist; + } + } +} + +void dist(const std::vector &data, std::vector &dist_matrix, + std::size_t start_row, std::size_t end_row, std::size_t n, + std::size_t d) { + for (std::size_t i = start_row; i < end_row; ++i) { + const std::size_t i_d = i * d; + const std::size_t i_n = i * n; + for (std::size_t j = 0; j < n; ++j) { + const std::size_t j_d = j * d; + double dist = 0.0; + for (std::size_t k = 0; k < d; ++k) { + double diff = data[i_d + k] - data[j_d + k]; + dist += diff * diff; + } + dist_matrix[i_n + j] = sqrt(dist); + } + } +} + +// [[Rcpp::export]] +NumericMatrix dist2_cpp(NumericMatrix input, std::size_t n_threads = 1) { + if (n_threads == 0) { + n_threads = 1; + } + std::size_t n = input.nrow(); + std::size_t d = input.ncol(); + + std::vector transposed_data(n * d); + for (std::size_t i = 0; i < n; ++i) { + for (std::size_t j = 0; j < d; ++j) { + transposed_data[i * d + j] = input(i, j); + } + } + + std::vector dist_matrix(n * n, 0.0); + + std::size_t chunk_size = (n + n_threads - 1) / n_threads; + + std::vector threads; + for (std::size_t t = 0; t < n_threads; ++t) { + std::size_t start_row = t * chunk_size; + std::size_t end_row = std::min(start_row + chunk_size, n); + threads.emplace_back(d2, std::cref(transposed_data), std::ref(dist_matrix), + start_row, end_row, n, d); + } + for (auto &thread : threads) { + thread.join(); + } + + NumericMatrix result(n, n); + std::copy(dist_matrix.begin(), dist_matrix.end(), result.begin()); + + return result; +} + +// [[Rcpp::export]] +NumericMatrix dist_cpp(NumericMatrix input, std::size_t n_threads = 1) { + if (n_threads == 0) { + n_threads = 1; + } + std::size_t n = input.nrow(); + std::size_t d = input.ncol(); + + std::vector transposed_data(n * d); + for (std::size_t i = 0; i < n; ++i) { + for (std::size_t j = 0; j < d; ++j) { + transposed_data[i * d + j] = input(i, j); + } + } + + std::vector dist_matrix(n * n, 0.0); + + std::size_t chunk_size = (n + n_threads - 1) / n_threads; + + std::vector threads; + for (std::size_t t = 0; t < n_threads; ++t) { + std::size_t start_row = t * chunk_size; + std::size_t end_row = std::min(start_row + chunk_size, n); + threads.emplace_back(dist, std::cref(transposed_data), + std::ref(dist_matrix), start_row, end_row, n, d); + } + + for (auto &thread : threads) { + thread.join(); + } + + NumericMatrix result(n, n); + std::copy(dist_matrix.begin(), dist_matrix.end(), result.begin()); + + return result; +} diff --git a/smallvis/tests/testthat/test_api.R b/smallvis/tests/testthat/test_api.R index 44ddc92..a1db4f1 100644 --- a/smallvis/tests/testthat/test_api.R +++ b/smallvis/tests/testthat/test_api.R @@ -168,6 +168,17 @@ test_that("smmds", { expect_equal(final_cost(res), 0.3038, tolerance = 1e-4) }) +test_that("smmds-cpp", { + res <- smallvis(iris10, Y_init = iris10_Y, method = "smmds", eta = 0.001, + epoch_callback = NULL, verbose = FALSE, ret_extra = TRUE, + use_cpp = TRUE) + expect_equal(res$Y, c2y(-0.4952, 0.3507, 0.3716, 0.5528, -0.502, -1.449, 0.1852, -0.2768, + 1.042, 0.2206, 0.1035, 0.4062, -0.1173, -0.07106, -0.1869, -0.02864, + -0.459, 0.09674, -0.1043, 0.3608), tolerance = 1e-3) + expect_equal(final_cost(res), 0.3038, tolerance = 1e-4) +}) + + test_that("gmmds", { res <- smallvis(iris10, Y_init = iris10_Y, method = "gmmds", eta = 0.1, perplexity = 3, From 5b901dfa2eb4f70b187523446d33b91599ac5d15 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sat, 27 Jul 2024 17:13:15 -0700 Subject: [PATCH 02/26] more C++ dist/t-weight experiments --- smallvis/R/RcppExports.R | 4 ++ smallvis/R/cost.R | 58 ++++++++++++----------- smallvis/R/smallvis.R | 89 ++++++++++++++++++++---------------- smallvis/R/sne.R | 68 +++++++++++++++------------ smallvis/src/RcppExports.cpp | 13 ++++++ smallvis/src/matrix.cpp | 37 +++++++++++++++ 6 files changed, 173 insertions(+), 96 deletions(-) diff --git a/smallvis/R/RcppExports.R b/smallvis/R/RcppExports.R index 923ffe2..5efe2ae 100644 --- a/smallvis/R/RcppExports.R +++ b/smallvis/R/RcppExports.R @@ -9,3 +9,7 @@ dist_cpp <- function(input, n_threads = 1L) { .Call(`_smallvis_dist_cpp`, input, n_threads) } +tweight_cpp <- function(dist_matrix, n_threads) { + .Call(`_smallvis_tweight_cpp`, dist_matrix, n_threads) +} + diff --git a/smallvis/R/cost.R b/smallvis/R/cost.R index c00e1ab..ed58500 100644 --- a/smallvis/R/cost.R +++ b/smallvis/R/cost.R @@ -160,13 +160,12 @@ largevis <- function(perplexity, inp_kernel = "gaussian", }, gr = function(cost, Y) { cost <- cost_update(cost, Y) - W <- cost$W cost$G <- k2g(Y, 4 * W * (cost$P - ((gamma * W) / (1 + cost$greps1 * W)))) cost }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + W) diag(W) <- 0 @@ -225,7 +224,7 @@ umap <- function(perplexity, inp_kernel = "skd", symmetrize = "umap", cost }, update = function(cost, Y) { - D2 <- dist2(Y) + D2 <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) D2[D2 < 0] <- 0 W <- 1 / (1 + cost$a * D2 ^ cost$b) @@ -268,7 +267,7 @@ tumap <- function(perplexity, inp_kernel = "skd", symmetrize = "umap", cost }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + W) diag(W) <- 0 @@ -308,7 +307,7 @@ ntumap <- function(perplexity, inp_kernel = "skd", symmetrize = "umap", }, update = function(cost, Y) { P <- cost$P - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + W) diag(W) <- 0 @@ -363,7 +362,7 @@ rklsne <- function(perplexity, inp_kernel = "gaussian", cost }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + W) diag(W) <- 0 sumW <- sum(W) @@ -418,7 +417,7 @@ jssne <- function(perplexity, inp_kernel = "gaussian", eps <- cost$eps P <- cost$P - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + W) diag(W) <- 0 sumW <- sum(W) @@ -483,7 +482,7 @@ chsne <- function(perplexity, inp_kernel = "gaussian", update = function(cost, Y) { P <- cost$P - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + W) diag(W) <- 0 Z <- sum(W) @@ -546,7 +545,7 @@ hlsne <- function(perplexity, inp_kernel = "gaussian", update = function(cost, Y) { P <- cost$P - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + W) diag(W) <- 0 Z <- sum(W) @@ -567,7 +566,7 @@ hlsne <- function(perplexity, inp_kernel = "gaussian", # alpha-beta divergence absne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", alpha = 1, lambda = 1, - eps = .Machine$double.eps, n_threads = 0) { + eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { beta <- lambda - alpha eps0 <- 1e-5 @@ -634,7 +633,7 @@ absne <- function(perplexity, inp_kernel = "gaussian", cost }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + W) diag(W) <- 0 Z <- sum(W) @@ -706,7 +705,7 @@ absneb0 <- function(perplexity, inp_kernel = "gaussian", cost }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + W) diag(W) <- 0 Z <- sum(W) @@ -776,7 +775,7 @@ absneamb <- function(perplexity, inp_kernel = "gaussian", cost }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + W) diag(W) <- 0 Z <- sum(W) @@ -845,7 +844,7 @@ absnea0 <- function(perplexity, inp_kernel = "gaussian", cost }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + W) diag(W) <- 0 Z <- sum(W) @@ -913,7 +912,7 @@ absne00 <- function(perplexity, inp_kernel = "gaussian", cost }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + W) diag(W) <- 0 Z <- sum(W) @@ -990,7 +989,8 @@ abssne <- function(perplexity, inp_kernel = "gaussian", update = function(cost, Y) { eps <- cost$eps - Q <- expQ(Y, cost$eps, is_symmetric = TRUE, matrix_normalize = TRUE)$Q + Q <- expQ(Y, cost$eps, is_symmetric = TRUE, matrix_normalize = TRUE, + use_cpp = use_cpp, n_threads = n_threads)$Q cost$PaQb <- cost$Pa * powm(Q, beta, eps) cost$PaQbc <- colSums(cost$PaQb) @@ -1066,7 +1066,7 @@ gsne <- function(perplexity, lambda = 1, inp_kernel = "gaussian", cost }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) What <- 1 + W W <- 1 / What diag(W) <- 0 @@ -1416,7 +1416,7 @@ ee <- function(perplexity, lambda = 100, neg_weights = TRUE, res }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- exp(-W) diag(W) <- 0 cost$W <- W @@ -1483,7 +1483,8 @@ nerv <- function(perplexity, lambda = 0.9, inp_kernel = "gaussian", update = function(cost, Y) { eps <- cost$eps - Q <- expQ(Y, eps, is_symmetric = TRUE)$Q + Q <- expQ(Y, eps, is_symmetric = TRUE, use_cpp = use_cpp, + n_threads = n_threads)$Q cost$Q <- Q # Reverse KL gradient @@ -1502,7 +1503,7 @@ nerv <- function(perplexity, lambda = 0.9, inp_kernel = "gaussian", snerv <- function(perplexity, lambda = 0.9, inp_kernel = "gaussian", symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0) { + n_threads = 0, use_cpp = FALSE) { lambda4 <- 4 * lambda oml <- 1 - lambda oml4 <- 4 * oml @@ -1539,7 +1540,8 @@ snerv <- function(perplexity, lambda = 0.9, inp_kernel = "gaussian", update = function(cost, Y) { eps <- cost$eps - Q <- expQ(Y, eps, is_symmetric = TRUE, matrix_normalize = TRUE)$Q + Q <- expQ(Y, eps, is_symmetric = TRUE, matrix_normalize = TRUE, + use_cpp = use_cpp, n_threads = n_threads)$Q cost$Q <- Q # Reverse KL gradient @@ -1610,7 +1612,8 @@ jse <- function(perplexity, kappa = 0.5, inp_kernel = "gaussian", update = function(cost, Y) { eps <- cost$eps - Q <- expQ(Y, eps = eps, is_symmetric = TRUE)$Q + Q <- expQ(Y, eps = eps, is_symmetric = TRUE, use_cpp = use_cpp, + n_threads = n_threads)$Q Z <- kappa * cost$P + om_kappa * Q Z[Z < eps] <- eps @@ -1630,7 +1633,7 @@ jse <- function(perplexity, kappa = 0.5, inp_kernel = "gaussian", sjse <- function(perplexity, kappa = 0.5, inp_kernel = "gaussian", symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0) { + n_threads = 0, use_cpp = FALSE) { eps0 <- 1e-5 kappa <- max(kappa, eps0) kappa <- min(kappa, 1 - eps0) @@ -1642,7 +1645,8 @@ sjse <- function(perplexity, kappa = 0.5, inp_kernel = "gaussian", lreplace( ssne(perplexity = perplexity, inp_kernel = inp_kernel, - symmetrize = symmetrize, eps = eps, n_threads = n_threads), + symmetrize = symmetrize, eps = eps, n_threads = n_threads, + use_cpp = use_cpp), pfn = function(cost, Y) { cost <- cost_update(cost, Y) eps <- cost$eps @@ -1663,7 +1667,8 @@ sjse <- function(perplexity, kappa = 0.5, inp_kernel = "gaussian", update = function(cost, Y) { eps <- cost$eps - Q <- expQ(Y, cost$eps, is_symmetric = TRUE, matrix_normalize = TRUE)$Q + Q <- expQ(Y, cost$eps, is_symmetric = TRUE, matrix_normalize = TRUE, + use_cpp = use_cpp, n_threads = n_threads)$Q Z <- kappa * cost$P + om_kappa * Q Z[Z < eps] <- eps diag(Z) <- 0 @@ -1758,7 +1763,8 @@ bnerv <- function(perplexity, lambda = 0.9, eps = .Machine$double.eps, update = function(cost, Y) { eps <- cost$eps - Q <- expQ(Y, eps, beta = cost$beta, is_symmetric = FALSE)$Q + Q <- expQ(Y, eps, beta = cost$beta, is_symmetric = FALSE, + use_cpp = use_cpp, n_threads = n_threads)$Q cost$Q <- Q # Reverse KL gradient diff --git a/smallvis/R/smallvis.R b/smallvis/R/smallvis.R index 55b2665..a3e7f27 100644 --- a/smallvis/R/smallvis.R +++ b/smallvis/R/smallvis.R @@ -893,54 +893,54 @@ smallvis <- function(X, k = 2, scale = "absmax", if (is.character(method)) { method <- match.arg(tolower(method), method_names) cost_fn <- switch(method, - tsne = tsne(perplexity = perplexity, n_threads = n_threads, eps = eps), - umap = umap(perplexity = perplexity, n_threads = n_threads, eps = eps), - largevis = largevis(perplexity = perplexity, n_threads = n_threads, eps = eps), - tumap = tumap(perplexity = perplexity, n_threads = n_threads, eps = eps), - ntumap = ntumap(perplexity = perplexity, n_threads = n_threads, eps = eps), + tsne = tsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + umap = umap(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + largevis = largevis(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + tumap = tumap(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + ntumap = ntumap(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), mmds = mmds(n_threads = n_threads, eps = eps, use_cpp = use_cpp), gmmds = gmmds(k = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - asne = asne(perplexity = perplexity, n_threads = n_threads, eps = eps), - ssne = ssne(perplexity = perplexity, n_threads = n_threads, eps = eps), - wtsne = wtsne(perplexity = perplexity, n_threads = n_threads, eps = eps), - wssne = wssne(perplexity = perplexity, n_threads = n_threads, eps = eps), - hssne = hssne(perplexity = perplexity, n_threads = n_threads, eps = eps), + asne = asne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + ssne = ssne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + wtsne = wtsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + wssne = wssne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + hssne = hssne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), ee = ee(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - nerv = nerv(perplexity = perplexity, n_threads = n_threads, eps = eps), - snerv = snerv(perplexity = perplexity, n_threads = n_threads, eps = eps), - jse = jse(perplexity = perplexity, n_threads = n_threads, eps = eps), - sjse = sjse(perplexity = perplexity, n_threads = n_threads, eps = eps), + nerv = nerv(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + snerv = snerv(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + jse = jse(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + sjse = sjse(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), smmds = smmds(n_threads = n_threads, eps = eps, use_cpp = use_cpp), sammon = sammon(n_threads = n_threads, eps = eps, use_cpp = use_cpp), - tasne = tasne(perplexity = perplexity, n_threads = n_threads, eps = eps), - trmsne = trmsne(perplexity = perplexity, n_threads = n_threads, eps = eps), - trsrsne = trsrsne(perplexity = perplexity, n_threads = n_threads, eps = eps), - tmsne = tmsne(perplexity = perplexity, n_threads = n_threads, eps = eps), - arsrsne = arsrsne(perplexity = perplexity, n_threads = n_threads, eps = eps), - rsrjse = rsrjse(perplexity = perplexity, n_threads = n_threads, eps = eps), - rsrnerv = rsrnerv(perplexity = perplexity, n_threads = n_threads, eps = eps), - btsne = btsne(perplexity = perplexity, n_threads = n_threads, eps = eps), - bssne = bssne(perplexity = perplexity, n_threads = n_threads, eps = eps), - basne = basne(perplexity = perplexity, n_threads = n_threads, eps = eps), - btasne = btasne(perplexity = perplexity, n_threads = n_threads, eps = eps), - bnerv = bnerv(perplexity = perplexity, n_threads = n_threads, eps = eps), + tasne = tasne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + trmsne = trmsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + trsrsne = trsrsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + tmsne = tmsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + arsrsne = arsrsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + rsrjse = rsrjse(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + rsrnerv = rsrnerv(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + btsne = btsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + bssne = bssne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + basne = basne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + btasne = btasne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + bnerv = bnerv(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), ballmmds = ballmmds(n_threads = n_threads, eps = eps, use_cpp = use_cpp), knnmmds = knnmmds(k = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - dhssne = dhssne(perplexity = perplexity, n_threads = n_threads, eps = eps), - tsneu = tsneu(perplexity = perplexity, n_threads = n_threads, eps = eps), - pstsne = pstsne(perplexity = perplexity, n_threads = n_threads, eps = eps), - skdtsne = skdtsne(perplexity = perplexity, n_threads = n_threads, eps = eps), - usne = usne(perplexity = perplexity, n_threads = n_threads, eps = eps), - cetsne = cetsne(perplexity = perplexity, n_threads = n_threads, eps = eps), - tee = tee(perplexity = perplexity, n_threads = n_threads, eps = eps), - absne = absne(perplexity = perplexity, n_threads = n_threads, eps = eps), - chsne = chsne(perplexity = perplexity, n_threads = n_threads, eps = eps), - hlsne = hlsne(perplexity = perplexity, n_threads = n_threads, eps = eps), + dhssne = dhssne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + tsneu = tsneu(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + pstsne = pstsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + skdtsne = skdtsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + usne = usne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + cetsne = cetsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + tee = tee(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + absne = absne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + chsne = chsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + hlsne = hlsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), gsne = gsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - rklsne = rklsne(perplexity = perplexity, n_threads = n_threads, eps = eps), - jssne = jssne(perplexity = perplexity, n_threads = n_threads, eps = eps), - abssne = abssne(perplexity = perplexity, n_threads = n_threads, eps = eps), - bhssne = bhssne(perplexity = perplexity, n_threads = n_threads, eps = eps), + rklsne = rklsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + jssne = jssne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + abssne = abssne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), + bhssne = bhssne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), stop("BUG: someone forgot to implement option: '", method, "'") ) } @@ -2564,6 +2564,15 @@ safe_dist2 <- function(X) { D2 } +calc_tweight <- function(D2, use_cpp = FALSE, n_threads = 1) { + if (use_cpp) { + tweight_cpp(D2, n_threads = n_threads) + } + else { + 1 / (1 + D2) + } +} + # 2-norm of a vector or matrix norm2 <- function(X) { sqrt(sum(X * X)) diff --git a/smallvis/R/sne.R b/smallvis/R/sne.R index 741aeb4..af0ebc9 100644 --- a/smallvis/R/sne.R +++ b/smallvis/R/sne.R @@ -14,10 +14,12 @@ exp_shift <- function(X) { } expQ <- function(Y, eps = .Machine$double.eps, beta = NULL, - A = NULL, - is_symmetric = FALSE, - matrix_normalize = FALSE) { - W <- dist2(Y) + A = NULL, + is_symmetric = FALSE, + matrix_normalize = FALSE, + use_cpp = FALSE, + n_threads = 1) { + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) if (!is.null(beta)) { W <- exp_shift(-W * beta) @@ -126,8 +128,9 @@ tsne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", res }, update = function(cost, Y) { - W <- dist2(Y) - W <- 1 / (1 + W) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + # not faster to use threading + W <- calc_tweight(W, use_cpp = FALSE, n_threads = n_threads) diag(W) <- 0 cost$Z <- sum(W) @@ -160,8 +163,8 @@ ssne <- function(perplexity, inp_kernel = "gaussian", cost }, update = function(cost, Y) { - cost$Q <- expQ(Y, cost$eps, is_symmetric = TRUE, - matrix_normalize = TRUE)$Q + cost$Q <- expQ(Y, cost$eps, is_symmetric = TRUE, matrix_normalize = TRUE, + use_cpp = use_cpp, n_threads = n_threads)$Q cost }, sentinel = "Q" @@ -191,7 +194,8 @@ asne <- function(perplexity, inp_kernel = "gaussian", cost }, update = function(cost, Y) { - cost$Q <- expQ(Y, eps = cost$eps, is_symmetric = FALSE)$Q + cost$Q <- expQ(Y, eps = cost$eps, is_symmetric = FALSE, + use_cpp = use_cpp, n_threads = n_threads)$Q cost }, sentinel = "Q" @@ -204,7 +208,7 @@ asne <- function(perplexity, inp_kernel = "gaussian", # In \emph{Advances in neural information processing systems} (pp. 2169-2177). hssne <- function(perplexity, alpha = 0.5, inp_kernel = "gaussian", symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0) { + n_threads = 0, use_cpp = FALSE) { alpha <- max(alpha, 1e-8) apow <- -1 / alpha lreplace( @@ -219,7 +223,7 @@ hssne <- function(perplexity, alpha = 0.5, inp_kernel = "gaussian", cost }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) # to include bandwidth # W <- (alpha * beta * W + 1) ^ (-1 / alpha) W <- powm(alpha * W + 1, apow, cost$eps) @@ -264,7 +268,7 @@ bhssne <- function(perplexity, alpha = 0.5, beta = 1, cost }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- powm(cost$ab * W + 1, cost$apow, cost$eps) diag(W) <- 0 @@ -278,7 +282,7 @@ bhssne <- function(perplexity, alpha = 0.5, beta = 1, # A version of HSSNE where alpha is allowed to vary at every epoch dhssne <- function(perplexity, alpha = 0.5, inp_kernel = "gaussian", symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0) { + n_threads = 0, use_cpp = FALSE) { alpha_min <- 1e-8 alpha <- max(alpha, alpha_min) lreplace( @@ -322,7 +326,7 @@ dhssne <- function(perplexity, alpha = 0.5, inp_kernel = "gaussian", }, update = function(cost, Y) { alpha <- cost$alpha - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- powm(alpha * W + 1, cost$apow, cost$eps) diag(W) <- 0 @@ -370,7 +374,7 @@ wtsne <- function(perplexity, inp_kernel = "gaussian", update = function(cost, Y) { M <- cost$M - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- M / (1 + W) diag(W) <- 0 @@ -411,7 +415,8 @@ wssne <- function(perplexity, inp_kernel = "gaussian", cost }, update = function(cost, Y) { - cost$Q <- expQ(Y, cost$eps, A = cost$M, matrix_normalize = TRUE)$Q + cost$Q <- expQ(Y, cost$eps, A = cost$M, matrix_normalize = TRUE, + use_cpp = use_cpp, n_threads = n_threads)$Q cost } ) @@ -488,7 +493,7 @@ pstsne <- function(perplexity, inp_kernel = "gaussian", cost }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + W) diag(W) <- 0 @@ -536,7 +541,7 @@ tee <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", cost }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + W) diag(W) <- 0 cost$W <- W @@ -563,9 +568,10 @@ tee <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", # UMAP/t-SNE Hybrids ------------------------------------------------------ # Calculate P via normalized smooth knn-distances -skdtsne <- function(perplexity, eps = .Machine$double.eps, n_threads = 0) { +skdtsne <- function(perplexity, eps = .Machine$double.eps, n_threads = 0, + use_cpp = FALSE) { tsne(perplexity = perplexity, inp_kernel = "skd", symmetrize = "umap", - eps = eps, n_threads = n_threads) + eps = eps, n_threads = n_threads, use_cpp = use_cpp) } # Use the UMAP curve family in output kernel @@ -585,7 +591,7 @@ usne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", cost }, update = function(cost, Y) { - D2 <- dist2(Y) + D2 <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) D2[D2 < 0] <- 0 W <- 1 / (1 + cost$a * D2 ^ cost$b) @@ -643,7 +649,7 @@ cetsne <- function(perplexity, inp_kernel = "gaussian", }, update = function(cost, Y) { P <- cost$P - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + W) diag(W) <- 0 @@ -689,7 +695,7 @@ btsne <- function(perplexity, inp_kernel = "gaussian", beta = NULL, cost }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + (cost$beta * W)) diag(W) <- 0 @@ -725,7 +731,8 @@ bssne <- function(perplexity, inp_kernel = "gaussian", beta = NULL, cost }, update = function(cost, Y) { - cost$Q <- expQ(Y, cost$eps, beta = cost$beta, matrix_normalize = TRUE)$Q + cost$Q <- expQ(Y, cost$eps, beta = cost$beta, matrix_normalize = TRUE, + use_cpp = use_cpp, n_threads = n_threads)$Q cost } ) @@ -752,7 +759,8 @@ basne <- function(perplexity, beta = NULL, eps = .Machine$double.eps, cost }, update = function(cost, Y) { - cost$Q <- expQ(Y, eps = cost$eps, beta = cost$beta, is_symmetric = FALSE)$Q + cost$Q <- expQ(Y, eps = cost$eps, beta = cost$beta, is_symmetric = FALSE, + use_cpp = use_cpp, n_threads = n_threads)$Q cost }, gr = function(cost, Y) { @@ -765,9 +773,9 @@ basne <- function(perplexity, beta = NULL, eps = .Machine$double.eps, # t-ASNE with input kernel bandwidths transferred to output btasne <- function(perplexity, beta = NULL, eps = .Machine$double.eps, - n_threads = 0) { + n_threads = 0, use_cpp = FALSE) { lreplace(basne(perplexity = perplexity, beta = beta, eps = eps, - n_threads = n_threads), + n_threads = n_threads, use_cpp = use_cpp), pfn = kl_cost, gr = function(cost, Y) { cost <- cost_update(cost, Y) @@ -777,7 +785,7 @@ btasne <- function(perplexity, beta = NULL, eps = .Machine$double.eps, cost }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + cost$beta * W) diag(W) <- 0 @@ -808,7 +816,7 @@ tasne <- function(perplexity, n_threads = 0, use_cpp = FALSE) { cost }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + W) diag(W) <- 0 @@ -843,7 +851,7 @@ tasne <- function(perplexity, inp_kernel = "gaussian", cost }, update = function(cost, Y) { - W <- dist2(Y) + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + W) diag(W) <- 0 diff --git a/smallvis/src/RcppExports.cpp b/smallvis/src/RcppExports.cpp index 8c05cba..8f8e016 100644 --- a/smallvis/src/RcppExports.cpp +++ b/smallvis/src/RcppExports.cpp @@ -34,10 +34,23 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// tweight_cpp +NumericMatrix tweight_cpp(NumericMatrix dist_matrix, int n_threads); +RcppExport SEXP _smallvis_tweight_cpp(SEXP dist_matrixSEXP, SEXP n_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< NumericMatrix >::type dist_matrix(dist_matrixSEXP); + Rcpp::traits::input_parameter< int >::type n_threads(n_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(tweight_cpp(dist_matrix, n_threads)); + return rcpp_result_gen; +END_RCPP +} static const R_CallMethodDef CallEntries[] = { {"_smallvis_dist2_cpp", (DL_FUNC) &_smallvis_dist2_cpp, 2}, {"_smallvis_dist_cpp", (DL_FUNC) &_smallvis_dist_cpp, 2}, + {"_smallvis_tweight_cpp", (DL_FUNC) &_smallvis_tweight_cpp, 2}, {NULL, NULL, 0} }; diff --git a/smallvis/src/matrix.cpp b/smallvis/src/matrix.cpp index 5ce07d3..c954e70 100644 --- a/smallvis/src/matrix.cpp +++ b/smallvis/src/matrix.cpp @@ -42,6 +42,13 @@ void dist(const std::vector &data, std::vector &dist_matrix, } } +void tweight(const std::vector &dist_matrix, + std::vector &transformed_matrix, int start, int end) { + for (int idx = start; idx < end; ++idx) { + transformed_matrix[idx] = 1.0 / (1.0 + dist_matrix[idx]); + } +} + // [[Rcpp::export]] NumericMatrix dist2_cpp(NumericMatrix input, std::size_t n_threads = 1) { if (n_threads == 0) { @@ -114,3 +121,33 @@ NumericMatrix dist_cpp(NumericMatrix input, std::size_t n_threads = 1) { return result; } + +// [[Rcpp::export]] +NumericMatrix tweight_cpp(NumericMatrix dist_matrix, int n_threads) { + int n = dist_matrix.nrow(); + int total_elements = n * n; + + std::vector dist_matrix_vec(dist_matrix.begin(), dist_matrix.end()); + + std::vector transformed_matrix(total_elements, 0.0); + + int chunk_size = (total_elements + n_threads - 1) / n_threads; + + std::vector threads; + for (int t = 0; t < n_threads; ++t) { + int start = t * chunk_size; + int end = std::min(start + chunk_size, total_elements); + threads.emplace_back(tweight, std::cref(dist_matrix_vec), + std::ref(transformed_matrix), start, end); + } + + for (auto &thread : threads) { + thread.join(); + } + + NumericMatrix result(n, n); + std::copy(transformed_matrix.begin(), transformed_matrix.end(), + result.begin()); + + return result; +} From a6f3ecd7ce6f3202f245e2e22ecbe19986b15d14 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sat, 27 Jul 2024 17:41:46 -0700 Subject: [PATCH 03/26] combine D2/W --- smallvis/R/RcppExports.R | 8 +++-- smallvis/R/smallvis.R | 14 ++++++-- smallvis/R/sne.R | 4 +-- smallvis/src/RcppExports.cpp | 19 +++++++++-- smallvis/src/matrix.cpp | 62 ++++++++++++++++++++++++++++++++++-- 5 files changed, 94 insertions(+), 13 deletions(-) diff --git a/smallvis/R/RcppExports.R b/smallvis/R/RcppExports.R index 5efe2ae..7ad2d3c 100644 --- a/smallvis/R/RcppExports.R +++ b/smallvis/R/RcppExports.R @@ -9,7 +9,11 @@ dist_cpp <- function(input, n_threads = 1L) { .Call(`_smallvis_dist_cpp`, input, n_threads) } -tweight_cpp <- function(dist_matrix, n_threads) { - .Call(`_smallvis_tweight_cpp`, dist_matrix, n_threads) +tweight_cpp <- function(input, n_threads = 1L) { + .Call(`_smallvis_tweight_cpp`, input, n_threads) +} + +d2_to_tweight_cpp <- function(dist_matrix, n_threads) { + .Call(`_smallvis_d2_to_tweight_cpp`, dist_matrix, n_threads) } diff --git a/smallvis/R/smallvis.R b/smallvis/R/smallvis.R index a3e7f27..b739642 100644 --- a/smallvis/R/smallvis.R +++ b/smallvis/R/smallvis.R @@ -2564,15 +2564,25 @@ safe_dist2 <- function(X) { D2 } -calc_tweight <- function(D2, use_cpp = FALSE, n_threads = 1) { +calc_d2tweight <- function(D2, use_cpp = FALSE, n_threads = 1) { if (use_cpp) { - tweight_cpp(D2, n_threads = n_threads) + d2_to_tweight_cpp(D2, n_threads = n_threads) } else { 1 / (1 + D2) } } +calc_tweight <- function(X, use_cpp = FALSE, n_threads = 1) { + if (use_cpp) { + tweight_cpp(X, n_threads = n_threads) + } + else { + D2 <- calc_d2(X, use_cpp = use_cpp, n_threads = n_threads) + 1 / (1 + D2) + } +} + # 2-norm of a vector or matrix norm2 <- function(X) { sqrt(sum(X * X)) diff --git a/smallvis/R/sne.R b/smallvis/R/sne.R index af0ebc9..59302e4 100644 --- a/smallvis/R/sne.R +++ b/smallvis/R/sne.R @@ -128,9 +128,7 @@ tsne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", res }, update = function(cost, Y) { - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - # not faster to use threading - W <- calc_tweight(W, use_cpp = FALSE, n_threads = n_threads) + W <- calc_tweight(Y, use_cpp = use_cpp, n_threads = n_threads) diag(W) <- 0 cost$Z <- sum(W) diff --git a/smallvis/src/RcppExports.cpp b/smallvis/src/RcppExports.cpp index 8f8e016..9107744 100644 --- a/smallvis/src/RcppExports.cpp +++ b/smallvis/src/RcppExports.cpp @@ -35,14 +35,26 @@ BEGIN_RCPP END_RCPP } // tweight_cpp -NumericMatrix tweight_cpp(NumericMatrix dist_matrix, int n_threads); -RcppExport SEXP _smallvis_tweight_cpp(SEXP dist_matrixSEXP, SEXP n_threadsSEXP) { +NumericMatrix tweight_cpp(NumericMatrix input, std::size_t n_threads); +RcppExport SEXP _smallvis_tweight_cpp(SEXP inputSEXP, SEXP n_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< NumericMatrix >::type input(inputSEXP); + Rcpp::traits::input_parameter< std::size_t >::type n_threads(n_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(tweight_cpp(input, n_threads)); + return rcpp_result_gen; +END_RCPP +} +// d2_to_tweight_cpp +NumericMatrix d2_to_tweight_cpp(NumericMatrix dist_matrix, int n_threads); +RcppExport SEXP _smallvis_d2_to_tweight_cpp(SEXP dist_matrixSEXP, SEXP n_threadsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericMatrix >::type dist_matrix(dist_matrixSEXP); Rcpp::traits::input_parameter< int >::type n_threads(n_threadsSEXP); - rcpp_result_gen = Rcpp::wrap(tweight_cpp(dist_matrix, n_threads)); + rcpp_result_gen = Rcpp::wrap(d2_to_tweight_cpp(dist_matrix, n_threads)); return rcpp_result_gen; END_RCPP } @@ -51,6 +63,7 @@ static const R_CallMethodDef CallEntries[] = { {"_smallvis_dist2_cpp", (DL_FUNC) &_smallvis_dist2_cpp, 2}, {"_smallvis_dist_cpp", (DL_FUNC) &_smallvis_dist_cpp, 2}, {"_smallvis_tweight_cpp", (DL_FUNC) &_smallvis_tweight_cpp, 2}, + {"_smallvis_d2_to_tweight_cpp", (DL_FUNC) &_smallvis_d2_to_tweight_cpp, 2}, {NULL, NULL, 0} }; diff --git a/smallvis/src/matrix.cpp b/smallvis/src/matrix.cpp index c954e70..bb10230 100644 --- a/smallvis/src/matrix.cpp +++ b/smallvis/src/matrix.cpp @@ -6,6 +6,24 @@ using namespace Rcpp; +void tweight(const std::vector &data, std::vector &dist_matrix, + std::size_t start_row, std::size_t end_row, std::size_t n, + std::size_t d) { + for (std::size_t i = start_row; i < end_row; ++i) { + const std::size_t i_d = i * d; + const std::size_t i_n = i * n; + for (std::size_t j = 0; j < n; ++j) { + const std::size_t j_d = j * d; + double dist = 0.0; + for (std::size_t k = 0; k < d; ++k) { + double diff = data[i_d + k] - data[j_d + k]; + dist += diff * diff; + } + dist_matrix[i_n + j] = 1.0 / (1.0 + dist); + } + } +} + void d2(const std::vector &data, std::vector &dist_matrix, std::size_t start_row, std::size_t end_row, std::size_t n, std::size_t d) { @@ -42,7 +60,7 @@ void dist(const std::vector &data, std::vector &dist_matrix, } } -void tweight(const std::vector &dist_matrix, +void d2_to_tweight(const std::vector &dist_matrix, std::vector &transformed_matrix, int start, int end) { for (int idx = start; idx < end; ++idx) { transformed_matrix[idx] = 1.0 / (1.0 + dist_matrix[idx]); @@ -122,8 +140,46 @@ NumericMatrix dist_cpp(NumericMatrix input, std::size_t n_threads = 1) { return result; } + +// [[Rcpp::export]] +NumericMatrix tweight_cpp(NumericMatrix input, std::size_t n_threads = 1) { + if (n_threads == 0) { + n_threads = 1; + } + std::size_t n = input.nrow(); + std::size_t d = input.ncol(); + + std::vector transposed_data(n * d); + for (std::size_t i = 0; i < n; ++i) { + for (std::size_t j = 0; j < d; ++j) { + transposed_data[i * d + j] = input(i, j); + } + } + + std::vector dist_matrix(n * n, 0.0); + + std::size_t chunk_size = (n + n_threads - 1) / n_threads; + + std::vector threads; + for (std::size_t t = 0; t < n_threads; ++t) { + std::size_t start_row = t * chunk_size; + std::size_t end_row = std::min(start_row + chunk_size, n); + threads.emplace_back(tweight, std::cref(transposed_data), + std::ref(dist_matrix), start_row, end_row, n, d); + } + + for (auto &thread : threads) { + thread.join(); + } + + NumericMatrix result(n, n); + std::copy(dist_matrix.begin(), dist_matrix.end(), result.begin()); + + return result; +} + // [[Rcpp::export]] -NumericMatrix tweight_cpp(NumericMatrix dist_matrix, int n_threads) { +NumericMatrix d2_to_tweight_cpp(NumericMatrix dist_matrix, int n_threads) { int n = dist_matrix.nrow(); int total_elements = n * n; @@ -137,7 +193,7 @@ NumericMatrix tweight_cpp(NumericMatrix dist_matrix, int n_threads) { for (int t = 0; t < n_threads; ++t) { int start = t * chunk_size; int end = std::min(start + chunk_size, total_elements); - threads.emplace_back(tweight, std::cref(dist_matrix_vec), + threads.emplace_back(d2_to_tweight, std::cref(dist_matrix_vec), std::ref(transformed_matrix), start, end); } From c73379eba7114ac126d59263a22a08b576a96449 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sat, 27 Jul 2024 18:05:40 -0700 Subject: [PATCH 04/26] check fixes --- smallvis/DESCRIPTION | 2 +- smallvis/NAMESPACE | 1 + smallvis/R/smallvis-package.R | 1 + smallvis/R/smallvis.R | 8 ++++---- smallvis/man/smallvis.Rd | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/smallvis/DESCRIPTION b/smallvis/DESCRIPTION index cb379a1..349fa2f 100644 --- a/smallvis/DESCRIPTION +++ b/smallvis/DESCRIPTION @@ -1,7 +1,7 @@ Package: smallvis Type: Package Title: Small Scale Neighborhood Embedding Algorithms -Version: 0.0.0.9000 +Version: 0.0.1.9000 Authors@R: person("James", "Melville", email = "jlmelville@gmail.com", role = c("aut", "cre")) Description: Neighborhood embedding methods for small scale visualization, such diff --git a/smallvis/NAMESPACE b/smallvis/NAMESPACE index f44dae6..c801172 100644 --- a/smallvis/NAMESPACE +++ b/smallvis/NAMESPACE @@ -3,4 +3,5 @@ export(smallvis) export(smallvis_perpstep) export(smallvis_rep) +importFrom(Rcpp,sourceCpp) useDynLib(smallvis, .registration = TRUE) diff --git a/smallvis/R/smallvis-package.R b/smallvis/R/smallvis-package.R index 37a033d..73cd7c4 100644 --- a/smallvis/R/smallvis-package.R +++ b/smallvis/R/smallvis-package.R @@ -1,4 +1,5 @@ ## usethis namespace: start #' @useDynLib smallvis, .registration = TRUE +#' @importFrom Rcpp sourceCpp ## usethis namespace: end NULL \ No newline at end of file diff --git a/smallvis/R/smallvis.R b/smallvis/R/smallvis.R index b739642..e34476a 100644 --- a/smallvis/R/smallvis.R +++ b/smallvis/R/smallvis.R @@ -621,7 +621,7 @@ #' tsne_iris <- smallvis(iris, perplexity = 50, verbose = TRUE) #' #' # Can use a custom epoch_callback for visualization -#' colors = rainbow(length(unique(iris$Species))) +#' colors = grDevices::rainbow(length(unique(iris$Species))) #' names(colors) = unique(iris$Species) #' ecb = function(x, y) { #' plot(x, t = 'n') @@ -993,7 +993,7 @@ smallvis <- function(X, k = 2, scale = "absmax", } } - if (class(pca) == "character" && pca == "whiten") { + if (methods::is(pca, "character") && pca == "whiten") { pca <- TRUE whiten <- TRUE } @@ -1891,7 +1891,7 @@ make_smallvis_cb <- function(df) { palette <- NULL function(Y, iter, cost = NULL) { if (is.null(palette)) { - palette <- vizier:::color_helper(df, color_scheme = rainbow)$palette + palette <- vizier:::color_helper(df, color_scheme = grDevices::rainbow)$palette } title <- paste0("iter: ", iter) if (!(is.null(cost) || is.na(cost))) { @@ -2687,7 +2687,7 @@ find_ab_params <- function(spread = 1, min_dist = 0.001) { stats::nls(yv ~ 1 / (1 + a * xv ^ (2 * b)), start = list(a = 1, b = 1))$m$getPars() }, silent = TRUE) - if (class(result) == "try-error") { + if (methods::is(result, "try-error")) { stop("Can't find a, b for provided spread/min_dist values") } result diff --git a/smallvis/man/smallvis.Rd b/smallvis/man/smallvis.Rd index b3afb4c..cc4f19b 100644 --- a/smallvis/man/smallvis.Rd +++ b/smallvis/man/smallvis.Rd @@ -714,7 +714,7 @@ function with these optimizers. tsne_iris <- smallvis(iris, perplexity = 50, verbose = TRUE) # Can use a custom epoch_callback for visualization -colors = rainbow(length(unique(iris$Species))) +colors = grDevices::rainbow(length(unique(iris$Species))) names(colors) = unique(iris$Species) ecb = function(x, y) { plot(x, t = 'n') From 954bbc6d8119a05675ddf756a65b302a9d0a64a4 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sun, 28 Jul 2024 14:44:41 -0700 Subject: [PATCH 05/26] C++ multi-threaded t-SNE grad --- smallvis/DESCRIPTION | 2 +- smallvis/R/RcppExports.R | 4 ++ smallvis/R/sne.R | 8 ++- smallvis/src/RcppExports.cpp | 16 +++++ smallvis/src/matrix.cpp | 96 ++++++++++++++++++++++++---- smallvis/tests/testthat/helper_api.R | 4 +- smallvis/tests/testthat/test_api.R | 9 +++ 7 files changed, 123 insertions(+), 16 deletions(-) diff --git a/smallvis/DESCRIPTION b/smallvis/DESCRIPTION index 349fa2f..7470b56 100644 --- a/smallvis/DESCRIPTION +++ b/smallvis/DESCRIPTION @@ -1,7 +1,7 @@ Package: smallvis Type: Package Title: Small Scale Neighborhood Embedding Algorithms -Version: 0.0.1.9000 +Version: 0.0.1.9001 Authors@R: person("James", "Melville", email = "jlmelville@gmail.com", role = c("aut", "cre")) Description: Neighborhood embedding methods for small scale visualization, such diff --git a/smallvis/R/RcppExports.R b/smallvis/R/RcppExports.R index 7ad2d3c..05e26d5 100644 --- a/smallvis/R/RcppExports.R +++ b/smallvis/R/RcppExports.R @@ -17,3 +17,7 @@ d2_to_tweight_cpp <- function(dist_matrix, n_threads) { .Call(`_smallvis_d2_to_tweight_cpp`, dist_matrix, n_threads) } +tsne_grad_cpp <- function(P, W, Z, Y, n_threads) { + .Call(`_smallvis_tsne_grad_cpp`, P, W, Z, Y, n_threads) +} + diff --git a/smallvis/R/sne.R b/smallvis/R/sne.R index 59302e4..02647fc 100644 --- a/smallvis/R/sne.R +++ b/smallvis/R/sne.R @@ -112,8 +112,12 @@ tsne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", gr = function(cost, Y) { cost <- cost_update(cost, Y) P <- cost$P - cost$G <- k2g(Y, 4 * cost$W * (P - cost$W / cost$Z)) - + if (use_cpp) { + cost$G <- tsne_grad_cpp(P, cost$W, cost$Z, Y, n_threads = n_threads) + } + else { + cost$G <- k2g(Y, 4 * cost$W * (P - cost$W / cost$Z)) + } cost }, export = function(cost, val) { diff --git a/smallvis/src/RcppExports.cpp b/smallvis/src/RcppExports.cpp index 9107744..5ac503a 100644 --- a/smallvis/src/RcppExports.cpp +++ b/smallvis/src/RcppExports.cpp @@ -58,12 +58,28 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// tsne_grad_cpp +NumericMatrix tsne_grad_cpp(const NumericMatrix& P, const NumericMatrix& W, double Z, const NumericMatrix& Y, std::size_t n_threads); +RcppExport SEXP _smallvis_tsne_grad_cpp(SEXP PSEXP, SEXP WSEXP, SEXP ZSEXP, SEXP YSEXP, SEXP n_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const NumericMatrix& >::type P(PSEXP); + Rcpp::traits::input_parameter< const NumericMatrix& >::type W(WSEXP); + Rcpp::traits::input_parameter< double >::type Z(ZSEXP); + Rcpp::traits::input_parameter< const NumericMatrix& >::type Y(YSEXP); + Rcpp::traits::input_parameter< std::size_t >::type n_threads(n_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(tsne_grad_cpp(P, W, Z, Y, n_threads)); + return rcpp_result_gen; +END_RCPP +} static const R_CallMethodDef CallEntries[] = { {"_smallvis_dist2_cpp", (DL_FUNC) &_smallvis_dist2_cpp, 2}, {"_smallvis_dist_cpp", (DL_FUNC) &_smallvis_dist_cpp, 2}, {"_smallvis_tweight_cpp", (DL_FUNC) &_smallvis_tweight_cpp, 2}, {"_smallvis_d2_to_tweight_cpp", (DL_FUNC) &_smallvis_d2_to_tweight_cpp, 2}, + {"_smallvis_tsne_grad_cpp", (DL_FUNC) &_smallvis_tsne_grad_cpp, 5}, {NULL, NULL, 0} }; diff --git a/smallvis/src/matrix.cpp b/smallvis/src/matrix.cpp index bb10230..5febb97 100644 --- a/smallvis/src/matrix.cpp +++ b/smallvis/src/matrix.cpp @@ -6,9 +6,32 @@ using namespace Rcpp; +void tsne_grad(const std::vector &P, const std::vector &W, + double Z, const std::vector &Y, + std::vector &gradient, std::size_t start, + std::size_t end, std::size_t n, std::size_t d) { + + const double Z4 = 4.0 / Z; + + for (std::size_t i = start; i < end; ++i) { + const std::size_t i_d = i * d; + const std::size_t i_n = i * n; + for (std::size_t j = 0; j < n; ++j) { + if (i == j) { + continue; + } + const std::size_t ij = i_n + j; + double k_ij = Z4 * W[ij] * (Z * P[ij] - W[ij]); + for (std::size_t k = 0; k < d; ++k) { + gradient[i_d + k] += k_ij * (Y[i_d + k] - Y[j * d + k]); + } + } + } +} + void tweight(const std::vector &data, std::vector &dist_matrix, - std::size_t start_row, std::size_t end_row, std::size_t n, - std::size_t d) { + std::size_t start_row, std::size_t end_row, std::size_t n, + std::size_t d) { for (std::size_t i = start_row; i < end_row; ++i) { const std::size_t i_d = i * d; const std::size_t i_n = i * n; @@ -61,7 +84,8 @@ void dist(const std::vector &data, std::vector &dist_matrix, } void d2_to_tweight(const std::vector &dist_matrix, - std::vector &transformed_matrix, int start, int end) { + std::vector &transformed_matrix, int start, + int end) { for (int idx = start; idx < end; ++idx) { transformed_matrix[idx] = 1.0 / (1.0 + dist_matrix[idx]); } @@ -140,7 +164,6 @@ NumericMatrix dist_cpp(NumericMatrix input, std::size_t n_threads = 1) { return result; } - // [[Rcpp::export]] NumericMatrix tweight_cpp(NumericMatrix input, std::size_t n_threads = 1) { if (n_threads == 0) { @@ -148,18 +171,18 @@ NumericMatrix tweight_cpp(NumericMatrix input, std::size_t n_threads = 1) { } std::size_t n = input.nrow(); std::size_t d = input.ncol(); - + std::vector transposed_data(n * d); for (std::size_t i = 0; i < n; ++i) { for (std::size_t j = 0; j < d; ++j) { transposed_data[i * d + j] = input(i, j); } } - + std::vector dist_matrix(n * n, 0.0); - + std::size_t chunk_size = (n + n_threads - 1) / n_threads; - + std::vector threads; for (std::size_t t = 0; t < n_threads; ++t) { std::size_t start_row = t * chunk_size; @@ -167,19 +190,22 @@ NumericMatrix tweight_cpp(NumericMatrix input, std::size_t n_threads = 1) { threads.emplace_back(tweight, std::cref(transposed_data), std::ref(dist_matrix), start_row, end_row, n, d); } - + for (auto &thread : threads) { thread.join(); } - + NumericMatrix result(n, n); std::copy(dist_matrix.begin(), dist_matrix.end(), result.begin()); - + return result; } // [[Rcpp::export]] NumericMatrix d2_to_tweight_cpp(NumericMatrix dist_matrix, int n_threads) { + if (n_threads == 0) { + n_threads = 1; + } int n = dist_matrix.nrow(); int total_elements = n * n; @@ -207,3 +233,51 @@ NumericMatrix d2_to_tweight_cpp(NumericMatrix dist_matrix, int n_threads) { return result; } + +// [[Rcpp::export]] +NumericMatrix tsne_grad_cpp(const NumericMatrix &P, + const NumericMatrix &W, double Z, + const NumericMatrix &Y, + std::size_t n_threads) { + std::size_t n = Y.nrow(); + std::size_t d = Y.ncol(); + + std::vector P_vec(P.begin(), P.end()); + std::vector W_vec(W.begin(), W.end()); + + std::vector Y_vec(n * d); + for (std::size_t i = 0; i < n; ++i) { + for (std::size_t j = 0; j < d; ++j) { + Y_vec[i * d + j] = Y(i, j); + } + } + + std::vector gradient_vec(n * d, 0.0); + + if (n_threads > 1) { + std::size_t chunk_size = (n + n_threads - 1) / n_threads; + std::vector threads; + for (std::size_t t = 0; t < n_threads; ++t) { + std::size_t start_row = t * chunk_size; + std::size_t end_row = std::min(start_row + chunk_size, n); + threads.emplace_back(tsne_grad, std::cref(P_vec), std::cref(W_vec), Z, + std::cref(Y_vec), std::ref(gradient_vec), start_row, + end_row, n, d); + } + for (auto &thread : threads) { + thread.join(); + } + } else { + tsne_grad(P_vec, W_vec, Z, Y_vec, gradient_vec, 0, n, n, d); + } + + NumericMatrix gradient(n, d); + for (std::size_t i = 0; i < n; ++i) { + for (std::size_t j = 0; j < d; ++j) { + gradient(i, j) = gradient_vec[i * d + j]; + } + } + + return gradient; +} + diff --git a/smallvis/tests/testthat/helper_api.R b/smallvis/tests/testthat/helper_api.R index 46e9317..6b679ad 100644 --- a/smallvis/tests/testthat/helper_api.R +++ b/smallvis/tests/testthat/helper_api.R @@ -1,8 +1,8 @@ -expect_api <- function(method, Y, cost, X = iris10) { +expect_api <- function(method, Y, cost, X = iris10, use_cpp = FALSE) { res <- smallvis(X, Y_init = iris10_Y, method = method, eta = 0.1, perplexity = 5, epoch_callback = NULL, verbose = FALSE, - ret_extra = TRUE) + ret_extra = TRUE, use_cpp = use_cpp) expect_equal(res$Y, c2y(Y), tolerance = 1e-3, info = paste0(method[[1]], " Y")) expect_equal(final_cost(res), cost, tolerance = 1e-4, info = paste0(method[[1]], " cost")) } diff --git a/smallvis/tests/testthat/test_api.R b/smallvis/tests/testthat/test_api.R index a1db4f1..1ae7e7e 100644 --- a/smallvis/tests/testthat/test_api.R +++ b/smallvis/tests/testthat/test_api.R @@ -512,6 +512,15 @@ test_that("Miscellany", { 0.04283, 1.227, -0.2283, -2.662, -1.389, 2.117, 1.291, -1.949, -0.4529, 2.002), X = ui10, cost = 0.1442) + expect_api(method = "tsne", Y = c(-4.617, 2.008, -0.8907, 4.044, 0.6146, -3.478, 0.4454, + 1.967, 4.82, -4.913, 0.5231, 0.837, 0.02866, -1.871, -1.438, + 1.273, 0.6842, -1.849, -0.6084, 2.42), X = ui10, + cost = 0.02485) + expect_api(method = "tsne", Y = c(-4.617, 2.008, -0.8907, 4.044, 0.6146, -3.478, 0.4454, + 1.967, 4.82, -4.913, 0.5231, 0.837, 0.02866, -1.871, -1.438, + 1.273, 0.6842, -1.849, -0.6084, 2.42), X = ui10, use_cpp = TRUE, + cost = 0.02485) + }) test_that("repeated runs", { From 4705f6893f4814469c646049cc292897ae7f6930 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sun, 28 Jul 2024 18:39:09 -0700 Subject: [PATCH 06/26] rename source file --- smallvis/src/{matrix.cpp => gradients.cpp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename smallvis/src/{matrix.cpp => gradients.cpp} (100%) diff --git a/smallvis/src/matrix.cpp b/smallvis/src/gradients.cpp similarity index 100% rename from smallvis/src/matrix.cpp rename to smallvis/src/gradients.cpp From f5c1bd98368d7117f22fe9a617249bbe7b3a12e3 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sun, 28 Jul 2024 18:40:34 -0700 Subject: [PATCH 07/26] perplexity calibration in multi-threaded C++ --- smallvis/R/RcppExports.R | 4 + smallvis/R/smallvis.R | 11 +-- smallvis/R/sne.R | 10 ++- smallvis/man/smallvis.Rd | 11 +-- smallvis/src/RcppExports.cpp | 16 ++++ smallvis/src/perplexity.cpp | 158 +++++++++++++++++++++++++++++++++++ 6 files changed, 197 insertions(+), 13 deletions(-) create mode 100644 smallvis/src/perplexity.cpp diff --git a/smallvis/R/RcppExports.R b/smallvis/R/RcppExports.R index 05e26d5..44391e2 100644 --- a/smallvis/R/RcppExports.R +++ b/smallvis/R/RcppExports.R @@ -21,3 +21,7 @@ tsne_grad_cpp <- function(P, W, Z, Y, n_threads) { .Call(`_smallvis_tsne_grad_cpp`, P, W, Z, Y, n_threads) } +find_beta_cpp <- function(X, perplexity = 15, tol = 1e-5, max_tries = 50L, n_threads = 1L) { + .Call(`_smallvis_find_beta_cpp`, X, perplexity, tol, max_tries, n_threads) +} + diff --git a/smallvis/R/smallvis.R b/smallvis/R/smallvis.R index e34476a..b1f2dc3 100644 --- a/smallvis/R/smallvis.R +++ b/smallvis/R/smallvis.R @@ -525,11 +525,12 @@ #' \code{Value} section for details. #' @param n_threads Number of threads to use in multi-threaded code. Default is #' 0, which means no multi-threading. Mainly affects the calculation of things -#' like distance matrices if you set \code{use_cpp = TRUE}. Otherwise, only -#' methods that need to calculate nearest neighbors will be affected. -#' @param use_cpp If \code{TRUE} use multi-threaded C++ code to calculate some -#' matrices. Default is \code{FALSE}. This won't speed up all steps and you -#' will want to use this in conjunction with \code{n_threads}. +#' like distance matrices and perplexity calibration if you set +#' \code{use_cpp = TRUE}. Otherwise, only methods that need to calculate +#' nearest neighbors will be affected. +#' @param use_cpp If \code{TRUE} use multi-threaded C++ code for some +#' calculations. Default is \code{FALSE}. This won't speed up all steps and +#' you will want to use this in conjunction with \code{n_threads}. #' @param eps Set epsilon for avoiding division-by-zero errors. Default is #' \code{.Machine$double.eps}, but if you see inconsistent convergence results #' with optimizer that should be reducing the cost each iteration, then try diff --git a/smallvis/R/sne.R b/smallvis/R/sne.R index 02647fc..e58c611 100644 --- a/smallvis/R/sne.R +++ b/smallvis/R/sne.R @@ -1056,9 +1056,13 @@ sne_init <- function(cost, } tsmessage("Commencing calibration for perplexity = ", format_perps(perplexity)) - x2ares <- x2aff(X, perplexity, tol = 1e-5, kernel = kernel, - verbose = verbose) - P <- x2ares$W + if (use_cpp) { + P <- find_beta_cpp(X, perplexity, tol = 1e-5, n_threads = n_threads)$W + } + else { + x2ares <- x2aff(X, perplexity, tol = 1e-5, kernel = kernel, verbose = verbose) + P <- x2ares$W + } } P <- scale_affinities(P, diff --git a/smallvis/man/smallvis.Rd b/smallvis/man/smallvis.Rd index cc4f19b..284de3c 100644 --- a/smallvis/man/smallvis.Rd +++ b/smallvis/man/smallvis.Rd @@ -197,12 +197,13 @@ to those value which are returned when this value is \code{TRUE}. See the \item{n_threads}{Number of threads to use in multi-threaded code. Default is 0, which means no multi-threading. Mainly affects the calculation of things -like distance matrices if you set \code{use_cpp = TRUE}. Otherwise, only -methods that need to calculate nearest neighbors will be affected.} +like distance matrices and perplexity calibration if you set +\code{use_cpp = TRUE}. Otherwise, only methods that need to calculate +nearest neighbors will be affected.} -\item{use_cpp}{If \code{TRUE} use multi-threaded C++ code to calculate some -matrices. Default is \code{FALSE}. This won't speed up all steps and you -will want to use this in conjunction with \code{n_threads}.} +\item{use_cpp}{If \code{TRUE} use multi-threaded C++ code for some +calculations. Default is \code{FALSE}. This won't speed up all steps and +you will want to use this in conjunction with \code{n_threads}.} \item{eps}{Set epsilon for avoiding division-by-zero errors. Default is \code{.Machine$double.eps}, but if you see inconsistent convergence results diff --git a/smallvis/src/RcppExports.cpp b/smallvis/src/RcppExports.cpp index 5ac503a..372b9ad 100644 --- a/smallvis/src/RcppExports.cpp +++ b/smallvis/src/RcppExports.cpp @@ -73,6 +73,21 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// find_beta_cpp +List find_beta_cpp(const NumericMatrix& X, double perplexity, double tol, int max_tries, std::size_t n_threads); +RcppExport SEXP _smallvis_find_beta_cpp(SEXP XSEXP, SEXP perplexitySEXP, SEXP tolSEXP, SEXP max_triesSEXP, SEXP n_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const NumericMatrix& >::type X(XSEXP); + Rcpp::traits::input_parameter< double >::type perplexity(perplexitySEXP); + Rcpp::traits::input_parameter< double >::type tol(tolSEXP); + Rcpp::traits::input_parameter< int >::type max_tries(max_triesSEXP); + Rcpp::traits::input_parameter< std::size_t >::type n_threads(n_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(find_beta_cpp(X, perplexity, tol, max_tries, n_threads)); + return rcpp_result_gen; +END_RCPP +} static const R_CallMethodDef CallEntries[] = { {"_smallvis_dist2_cpp", (DL_FUNC) &_smallvis_dist2_cpp, 2}, @@ -80,6 +95,7 @@ static const R_CallMethodDef CallEntries[] = { {"_smallvis_tweight_cpp", (DL_FUNC) &_smallvis_tweight_cpp, 2}, {"_smallvis_d2_to_tweight_cpp", (DL_FUNC) &_smallvis_d2_to_tweight_cpp, 2}, {"_smallvis_tsne_grad_cpp", (DL_FUNC) &_smallvis_tsne_grad_cpp, 5}, + {"_smallvis_find_beta_cpp", (DL_FUNC) &_smallvis_find_beta_cpp, 5}, {NULL, NULL, 0} }; diff --git a/smallvis/src/perplexity.cpp b/smallvis/src/perplexity.cpp new file mode 100644 index 0000000..b261897 --- /dev/null +++ b/smallvis/src/perplexity.cpp @@ -0,0 +1,158 @@ +#include +#include +#include +#include +#include +#include + +#include + +using namespace Rcpp; + +void find_beta(const std::vector &data, std::size_t n, std::size_t d, + double perplexity, double logU, double tol, int max_tries, + std::vector &W, std::vector &beta, int &bad_perp, + std::size_t start_row, std::size_t end_row) { + + for (std::size_t i = start_row; i < end_row; ++i) { + const std::size_t i_d = i * d; + const std::size_t idx = i * n; + + std::vector D2i(n); + double sum_d2i = 0.0; + + // D2 + for (std::size_t j = 0; j < n; ++j) { + const std::size_t j_d = j * d; + double dist = 0.0; + for (std::size_t k = 0; k < d; ++k) { + double diff = data[i_d + k] - data[j_d + k]; + dist += diff * diff; + } + sum_d2i += dist; + D2i[j] = dist; + } + + double betamin = -std::numeric_limits::infinity(); + double betamax = std::numeric_limits::infinity(); + + // Initial guess for beta: 0.5 * perplexity / mean(D2i) + beta[i] = (0.5 * perplexity * n) / sum_d2i; + + double Z = 0.0; + double entropy = 0.0; + for (std::size_t j = 0; j < n; ++j) { + if (i == j) { + W[idx + j] = 0.0; + continue; + } + W[idx + j] = exp(-D2i[j] * beta[i]); + entropy += D2i[j] * W[idx + j]; + Z += W[idx + j]; + } + if (Z == 0.0) { + entropy = 0.0; + } else { + entropy = (entropy / Z) * beta[i] + log(Z); + } + + double Hdiff = entropy - logU; + + int tries = 0; + while (fabs(Hdiff) > tol && tries < max_tries) { + if (Hdiff > 0) { + betamin = beta[i]; + if (std::isinf(betamax)) { + beta[i] *= 2; + } else { + beta[i] = (beta[i] + betamax) / 2; + } + } else { + betamax = beta[i]; + if (std::isinf(betamin)) { + beta[i] /= 2; + } else { + beta[i] = (beta[i] + betamin) / 2; + } + } + + Z = 0.0; + entropy = 0.0; + for (std::size_t j = 0; j < n; ++j) { + if (i == j) { + W[idx + j] = 0.0; + continue; + } + W[idx + j] = exp(-D2i[j] * beta[i]); + entropy += D2i[j] * W[idx + j]; + Z += W[idx + j]; + } + if (Z == 0.0) { + entropy = 0.0; + } else { + entropy = (entropy / Z) * beta[i] + log(Z); + } + + Hdiff = entropy - logU; + tries++; + } + + if (fabs(Hdiff) > tol) { + bad_perp++; + std::fill(W.begin() + idx, W.begin() + idx + n, 0.0); + std::vector knn_idx(n); + std::iota(knn_idx.begin(), knn_idx.end(), 0); + std::partial_sort( + knn_idx.begin(), + knn_idx.begin() + std::max(static_cast(floor(perplexity)), 1), + knn_idx.end(), [&D2i](int a, int b) { return D2i[a] < D2i[b]; }); + for (int k = 0; k < std::max(static_cast(floor(perplexity)), 1); + ++k) { + W[idx + knn_idx[k]] = 1.0; + } + } + } +} + +// [[Rcpp::export]] +List find_beta_cpp(const NumericMatrix &X, double perplexity = 15, + double tol = 1e-5, int max_tries = 50, + std::size_t n_threads = 1) { + + std::size_t n = X.nrow(); + std::size_t d = X.ncol(); + double logU = log(perplexity); + + std::vector X_vec(n * d); + for (std::size_t i = 0; i < n; ++i) { + for (std::size_t j = 0; j < d; ++j) { + X_vec[i * d + j] = X(i, j); + } + } + + std::vector W(n * n, 0.0); + std::vector beta(n, 0.0); + int bad_perp = 0; + if (n_threads > 1) { + std::size_t chunk_size = (n + n_threads - 1) / n_threads; + std::vector threads; + for (std::size_t t = 0; t < n_threads; ++t) { + std::size_t start_row = t * chunk_size; + std::size_t end_row = std::min(start_row + chunk_size, n); + threads.emplace_back(find_beta, std::cref(X_vec), n, d, perplexity, logU, + tol, max_tries, std::ref(W), std::ref(beta), + std::ref(bad_perp), start_row, end_row); + } + for (auto &thread : threads) { + thread.join(); + } + } else { + find_beta(X_vec, n, d, perplexity, logU, tol, max_tries, W, beta, bad_perp, + 0, n); + } + + NumericMatrix W_mat(n, n); + std::copy(W.begin(), W.end(), W_mat.begin()); + return List::create(Named("W") = transpose(W_mat), Named("beta") = beta, + Named("bad_perp") = bad_perp); +} From a83817a92efc22bcd9b231486ba62b5653d63f09 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sun, 28 Jul 2024 18:46:39 -0700 Subject: [PATCH 08/26] Fix 1/k knn if perplexity calibration fails --- smallvis/R/smallvis.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/smallvis/R/smallvis.R b/smallvis/R/smallvis.R index b1f2dc3..05dad45 100644 --- a/smallvis/R/smallvis.R +++ b/smallvis/R/smallvis.R @@ -2211,13 +2211,15 @@ x2aff <- function(X, perplexity = 15, tol = 1e-5, kernel = "gauss", tries <- tries + 1 } if (abs(Hdiff) > tol) { + # Put a weight of 1/perplexity on the perplexity-nearest neighbors bad_perp <- bad_perp + 1 knn_idx <- order(Di, decreasing = FALSE)[1:max(floor(perplexity), 1)] knn_idx[knn_idx >= i] <- knn_idx[knn_idx >= i] + 1 - Wi <- rep(0, length(Di)) - Wi[knn_idx] <- 1 + Wi <- rep(0, n) + Wi[knn_idx] <- 1 / floor(perplexity) intd[i] <- 0 + W[i, ] <- Wi } else { # if we didn't supply estimates for beta manually, then initialize guess for @@ -2227,8 +2229,9 @@ x2aff <- function(X, perplexity = 15, tol = 1e-5, kernel = "gauss", beta[i + 1] <- beta[i] } intd[i] <- intd_x2aff(Di, beta[i], Wi, sumWi, H) + W[i, -i] <- Wi } - W[i, -i] <- Wi + } sigma <- sqrt(1 / beta) From 491d3722f2806e09c9c527861d9d37fda6d94512 Mon Sep 17 00:00:00 2001 From: James Melville Date: Wed, 31 Jul 2024 22:17:16 -0700 Subject: [PATCH 09/26] add multi-threaded MMDS gradient --- smallvis/R/RcppExports.R | 4 ++ smallvis/R/cost.R | 7 +++- smallvis/src/RcppExports.cpp | 16 ++++++++ smallvis/src/gradients.cpp | 72 ++++++++++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 1 deletion(-) diff --git a/smallvis/R/RcppExports.R b/smallvis/R/RcppExports.R index 44391e2..3728f9e 100644 --- a/smallvis/R/RcppExports.R +++ b/smallvis/R/RcppExports.R @@ -21,6 +21,10 @@ tsne_grad_cpp <- function(P, W, Z, Y, n_threads) { .Call(`_smallvis_tsne_grad_cpp`, P, W, Z, Y, n_threads) } +mmds_grad_cpp <- function(R, D, Y, eps, n_threads) { + .Call(`_smallvis_mmds_grad_cpp`, R, D, Y, eps, n_threads) +} + find_beta_cpp <- function(X, perplexity = 15, tol = 1e-5, max_tries = 50L, n_threads = 1L) { .Call(`_smallvis_find_beta_cpp`, X, perplexity, tol, max_tries, n_threads) } diff --git a/smallvis/R/cost.R b/smallvis/R/cost.R index ed58500..db81356 100644 --- a/smallvis/R/cost.R +++ b/smallvis/R/cost.R @@ -1114,7 +1114,12 @@ mmds <- function(eps = .Machine$double.eps, use_cpp = FALSE, n_threads = 1) { }, gr = function(cost, Y) { cost <- cost_update(cost, Y) - cost$G <- k2g(Y, -4 * (cost$R - cost$D) / (cost$D + cost$eps)) + if (use_cpp) { + cost$G <- mmds_grad_cpp(cost$R, cost$D, Y, eps = eps, n_threads = n_threads) + } + else { + cost$G <- k2g(Y, -4 * (cost$R - cost$D) / (cost$D + cost$eps)) + } cost }, update = function(cost, Y) { diff --git a/smallvis/src/RcppExports.cpp b/smallvis/src/RcppExports.cpp index 372b9ad..93960dc 100644 --- a/smallvis/src/RcppExports.cpp +++ b/smallvis/src/RcppExports.cpp @@ -73,6 +73,21 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// mmds_grad_cpp +NumericMatrix mmds_grad_cpp(const NumericMatrix& R, const NumericMatrix& D, const NumericMatrix& Y, double eps, std::size_t n_threads); +RcppExport SEXP _smallvis_mmds_grad_cpp(SEXP RSEXP, SEXP DSEXP, SEXP YSEXP, SEXP epsSEXP, SEXP n_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const NumericMatrix& >::type R(RSEXP); + Rcpp::traits::input_parameter< const NumericMatrix& >::type D(DSEXP); + Rcpp::traits::input_parameter< const NumericMatrix& >::type Y(YSEXP); + Rcpp::traits::input_parameter< double >::type eps(epsSEXP); + Rcpp::traits::input_parameter< std::size_t >::type n_threads(n_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(mmds_grad_cpp(R, D, Y, eps, n_threads)); + return rcpp_result_gen; +END_RCPP +} // find_beta_cpp List find_beta_cpp(const NumericMatrix& X, double perplexity, double tol, int max_tries, std::size_t n_threads); RcppExport SEXP _smallvis_find_beta_cpp(SEXP XSEXP, SEXP perplexitySEXP, SEXP tolSEXP, SEXP max_triesSEXP, SEXP n_threadsSEXP) { @@ -95,6 +110,7 @@ static const R_CallMethodDef CallEntries[] = { {"_smallvis_tweight_cpp", (DL_FUNC) &_smallvis_tweight_cpp, 2}, {"_smallvis_d2_to_tweight_cpp", (DL_FUNC) &_smallvis_d2_to_tweight_cpp, 2}, {"_smallvis_tsne_grad_cpp", (DL_FUNC) &_smallvis_tsne_grad_cpp, 5}, + {"_smallvis_mmds_grad_cpp", (DL_FUNC) &_smallvis_mmds_grad_cpp, 5}, {"_smallvis_find_beta_cpp", (DL_FUNC) &_smallvis_find_beta_cpp, 5}, {NULL, NULL, 0} }; diff --git a/smallvis/src/gradients.cpp b/smallvis/src/gradients.cpp index 5febb97..055202f 100644 --- a/smallvis/src/gradients.cpp +++ b/smallvis/src/gradients.cpp @@ -6,6 +6,28 @@ using namespace Rcpp; +void mmds_grad(const std::vector &R, const std::vector &D, + const std::vector &Y, double eps, + std::vector &gradient, std::size_t start, + std::size_t end, std::size_t n, std::size_t d) { + + for (std::size_t i = start; i < end; ++i) { + const std::size_t i_d = i * d; + const std::size_t i_n = i * n; + for (std::size_t j = 0; j < n; ++j) { + if (i == j) { + continue; + } + const std::size_t ij = i_n + j; + double k_ij = 4.0 * (D[ij] - R[ij]) / (D[ij] + eps); + for (std::size_t k = 0; k < d; ++k) { + gradient[i_d + k] += k_ij * (Y[i_d + k] - Y[j * d + k]); + } + } + } +} + + void tsne_grad(const std::vector &P, const std::vector &W, double Z, const std::vector &Y, std::vector &gradient, std::size_t start, @@ -281,3 +303,53 @@ NumericMatrix tsne_grad_cpp(const NumericMatrix &P, return gradient; } +// [[Rcpp::export]] +NumericMatrix mmds_grad_cpp(const NumericMatrix &R, + const NumericMatrix &D, + const NumericMatrix &Y, + double eps, + std::size_t n_threads) { + std::size_t n = Y.nrow(); + std::size_t d = Y.ncol(); + + std::vector R_vec(R.begin(), R.end()); + std::vector D_vec(D.begin(), D.end()); + + std::vector Y_vec(n * d); + for (std::size_t i = 0; i < n; ++i) { + for (std::size_t j = 0; j < d; ++j) { + Y_vec[i * d + j] = Y(i, j); + } + } + + std::vector gradient_vec(n * d, 0.0); + + if (n_threads > 1) { + std::size_t chunk_size = (n + n_threads - 1) / n_threads; + std::vector threads; + for (std::size_t t = 0; t < n_threads; ++t) { + std::size_t start_row = t * chunk_size; + std::size_t end_row = std::min(start_row + chunk_size, n); + threads.emplace_back(mmds_grad, std::cref(R_vec), std::cref(D_vec), + std::cref(Y_vec), eps, std::ref(gradient_vec), start_row, + end_row, n, d); + } + for (auto &thread : threads) { + thread.join(); + } + } else { + mmds_grad(R_vec, D_vec, Y_vec, eps, gradient_vec, 0, n, n, d); + } + + NumericMatrix gradient(n, d); + for (std::size_t i = 0; i < n; ++i) { + for (std::size_t j = 0; j < d; ++j) { + gradient(i, j) = gradient_vec[i * d + j]; + } + } + + return gradient; +} + + + From 98e0c6f7102ca2de20e02e56b2a02e528673d63e Mon Sep 17 00:00:00 2001 From: James Melville Date: Sat, 3 Aug 2024 11:15:50 -0700 Subject: [PATCH 10/26] move perplexity --- smallvis/R/perplexity.R | 434 ++++++++++++++++++++++++++++++++++++++++ smallvis/R/smallvis.R | 433 --------------------------------------- 2 files changed, 434 insertions(+), 433 deletions(-) create mode 100644 smallvis/R/perplexity.R diff --git a/smallvis/R/perplexity.R b/smallvis/R/perplexity.R new file mode 100644 index 0000000..9dfe5d0 --- /dev/null +++ b/smallvis/R/perplexity.R @@ -0,0 +1,434 @@ +# Perplexity Calibration -------------------------------------------------- + +# Calculates the input affinities from X, such that each normalized row of the +# affinity matrix has the specified perplexity (within the supplied tolerance). +# Returns a list containing the affinities, beta values and intrinsic +# dimensionalities. +# NB set default kernel to "exp" to get results closer to the tsne package. +# This differs from the procedure in the t-SNE paper by exponentially weighting +# the distances, rather than the squared distances. +x2aff <- function(X, perplexity = 15, tol = 1e-5, kernel = "gauss", + verbose = FALSE, guesses = NULL) { + x_is_dist <- methods::is(X, "dist") + if (x_is_dist) { + D <- X + n <- attr(D, "Size") + + D <- as.matrix(D) + if (kernel == "gauss") { + D <- D * D + } + } + else { + XX <- rowSums(X * X) + n <- nrow(X) + } + + nperps <- length(perplexity) + if (nperps > 1 && nperps != n) { + stop("Must provide one perplexity per point") + } + + if (!is.null(guesses) && length(guesses) != n) { + stop("Initial guess vector must match number of observations in X") + } + + W <- matrix(0, n, n) + intd <- rep(0, n) + if (!is.null(guesses)) { + beta <- guesses + } + else { + beta <- rep(1, n) + } + if (nperps == 1) { + logU <- log(perplexity) + } + else { + perps <- perplexity + } + bad_perp <- 0 + + for (i in 1:n) { + if (nperps > 1) { + perplexity <- perps[i] + logU <- log(perplexity) + } + betamin <- -Inf + betamax <- Inf + + if (x_is_dist) { + Di <- D[i, -i] + } + else { + Di <- (XX[i] + XX - 2 * colSums(tcrossprod(X[i, ], X)))[-i] + Di[Di < 0] <- 0 + if (kernel == "exp") { + Di <- sqrt(Di) + } + } + + # If we haven't been provided with guesses, then try the initialization used + # for all points in ELKI according to Schubert & Gertz in "Intrinsic + # t-Stochastic Neighbor Embedding for Visualization and Outlier Detection: A + # Remedy Against the Curse of Dimensionality?" Using the last optimized beta + # seems to be better most of the time based on my testing though, so we'll + # only use it for the first point. + if (is.null(guesses) && i == 1) { + beta[1] <- 0.5 * perplexity / mean(Di) + } + + sres <- shannon(Di, beta[i]) + H <- sres$H + Wi <- sres$W + sumWi <- sres$Z + + Hdiff <- H - logU + tries <- 0 + while (abs(Hdiff) > tol && tries < 50) { + if (Hdiff > 0) { + betamin <- beta[i] + if (is.infinite(betamax)) { + beta[i] <- beta[i] * 2 + } else { + beta[i] <- (beta[i] + betamax) / 2 + } + } else { + betamax <- beta[i] + if (is.infinite(betamin)) { + beta[i] <- beta[i] / 2 + } else { + beta[i] <- (beta[i] + betamin) / 2 + } + } + + sres <- shannon(Di, beta[i]) + H <- sres$H + Wi <- sres$W + sumWi <- sres$Z + + Hdiff <- H - logU + tries <- tries + 1 + } + if (abs(Hdiff) > tol) { + # Put a weight of 1/perplexity on the perplexity-nearest neighbors + bad_perp <- bad_perp + 1 + knn_idx <- order(Di, decreasing = FALSE)[1:max(floor(perplexity), 1)] + knn_idx[knn_idx >= i] <- knn_idx[knn_idx >= i] + 1 + Wi <- rep(0, n) + Wi[knn_idx] <- 1 / floor(perplexity) + + intd[i] <- 0 + W[i, ] <- Wi + } + else { + # if we didn't supply estimates for beta manually, then initialize guess for + # next point with optimized beta for this point: doesn't save many + # iterations, but why not? + if (is.null(guesses) && i < n) { + beta[i + 1] <- beta[i] + } + intd[i] <- intd_x2aff(Di, beta[i], Wi, sumWi, H) + W[i, -i] <- Wi + } + + } + sigma <- sqrt(1 / beta) + + if (bad_perp > 0) { + tsmessage("Warning: ", bad_perp, " perplexity calibrations failed!") + warning(bad_perp, " perplexity calibrations failed") + } + + if (verbose) { + summarize(sigma, "sigma summary", verbose = verbose) + summarize(intd, "Dint", verbose = verbose) + } + list(W = W, beta = beta, dint = intd) +} + +# Calculates affinites based on exponential weighting of D2 with beta +# and returns a list containing: +# W, the affinities; Z, the sum of the affinities; H, the Shannon entropy +# This routine relies specifically on input weights being = exp(-beta * D) +# and calculates the Shannon entropy as log(Z) + beta * sum(W * D) / Z +# where Z is the sum of W. +shannon <- function(D2, beta) { + W <- exp(-D2 * beta) + Z <- sum(W) + + if (Z == 0) { + H <- 0 + } + else { + H <- log(Z) + beta * sum(D2 * W) / Z + } + list( + W = W, + Z = Z, + H = H + ) +} + +x2aff_sigma <- function(X, sigma = 1e-3, verbose = FALSE, use_cpp = FALSE, + n_threads = 1) { + x_is_dist <- methods::is(X, "dist") + if (x_is_dist) { + D <- X + + D <- as.matrix(D) + D <- D * D + } + else { + D <- calc_d2(X, use_cpp = use_cpp, n_threads = n_threads) + } + beta <- 1 / (sigma * sigma) + sres <- shannon(D, beta) + W <- sres$W + diag(W) <- 0 + + list(W = W, beta = beta) +} + +# Create a symmetrized distance matrix based on the k-nearest neighbors +# Non-neighbor distances are set to Inf +knn_dist <- function(X, k, n_threads, verbose) { + if (methods::is(X, "dist")) { + # If it's already a distance matrix, find k-smallest distance per column + # (ignoring self-distances of zero) and set everything larger to Inf + # (potentially more than k finite distances in the event of ties, not + # going to worry about that) + D <- as.matrix(X) + n <- nrow(D) + if (k > n - 1) { + stop("k must be not be > n - 1") + } + kdists <- Rfast::colnth(D, rep(k + 1, n)) + for (i in 1:n) { + Di <- D[, i] + Di[Di > kdists[i]] <- Inf + D[, i] <- Di + } + } + else { + # Find the k-nearest indexes and distances of X, and set the corresponding + # distance matrix elements + n <- nrow(X) + if (k > n - 1) { + stop("k must be not be > n - 1") + } + tsmessage("Finding ", k + 1, " nearest neighbors") + knn <- rnndescent::brute_force_knn(X, k = k + 1, n_threads = n_threads) + knn$idx <- knn$idx[, 2:(k + 1)] + knn$dist <- knn$dist[, 2:(k + 1)] + + D <- matrix(Inf, nrow = n, ncol = n) + diag(D) <- 0 + for (i in 1:n) { + D[i, knn$idx[i, ]] <- knn$dist[i, ] + } + } + + # symmetrize + pmin(D, t(D)) +} + +# Create the knn graph: D[i, j] = 1 if j is one of i's k-nearest neighbors. +# i is NOT considered a neighbor of itself. +# No symmetrization is carried out. +# Used by knnmmds and knn kernel for SNE +knn_graph <- function(X, k, n_threads, verbose) { + if (methods::is(X, "dist")) { + D <- as.matrix(X) + n <- nrow(D) + if (k > n - 1) { + stop("k must be not be > n - 1") + } + kdists <- Rfast::colnth(D, rep(k + 1, n)) + for (i in 1:n) { + Di <- D[, i] + Di[Di > kdists[i]] <- 0 + D[, i] <- 1 + } + } + else { + # Find the k-nearest indexes and distances of X, and set the corresponding + # distance matrix elements + n <- nrow(X) + if (k > n - 1) { + stop("k must be not be > n - 1") + } + + tsmessage("Finding ", k + 1, " nearest neighbors") + knn <- rnndescent::brute_force_knn(X, k = k + 1, n_threads = n_threads) + knn$idx <- knn$idx[, 2:(k + 1)] + + D <- matrix(0, nrow = n, ncol = n) + for (i in 1:n) { + D[i, knn$idx[i, ]] <- 1 + } + } + D +} + +# Given data X and k nearest neighbors, return a geodisic distance matrix +# Disconnections are treated by using the Euclidean distance. +geodesic <- function(X, k, fill = TRUE, use_cpp = FALSE, n_threads = 0, + verbose = FALSE) { + tsmessage("Calculating geodesic distances with k = ", k) + + R <- knn_dist(X, k, n_threads = n_threads, verbose = verbose) + # The hard work is done by Rfast's implementation of Floyd's algorithm + G <- Rfast::floyd(R) + if (any(is.infinite(G)) && fill) { + tsmessage("k = ", k, " resulted in disconnections: filling with Euclidean distances") + if (methods::is(X, "dist")) { + R <- as.matrix(X) + } + else { + R <- calc_d(X, use_cpp = use_cpp, n_threads = n_threads) + } + G[is.infinite(G)] <- R[is.infinite(G)] + } + G +} + +# Multiscale perplexities: P is an average over the results of multiple +# perplexities +# as described by de Bodt et al in +# Perplexity-free t-SNE and twice Student tt-SNE (2018) +msp <- function(X, perplexities = NULL, tol = 1e-5, + symmetrize = "symmetric", row_normalize = TRUE, + normalize = TRUE, + verbose = FALSE, guesses = NULL) { + if (methods::is(X, "dist")) { + n <- attr(X, "Size") + } + else { + n <- nrow(X) + } + + if (is.null(perplexities)) { + perplexities <- idp_perps(n) + } + tsmessage("Calculating multi-scale P with perplexities from ", + formatC(perplexities[1]), " to ", formatC(last(perplexities))) + + res <- NULL + for (perplexity in perplexities) { + tsmessage("Commencing calibration for perplexity = ", + format_perps(perplexity)) + x2a_res <- x2aff(X = X, perplexity = perplexity, tol = tol, kernel = "gauss", + verbose = verbose, guesses = guesses) + P <- x2a_res$W + Q <- scale_affinities(P, + symmetrize = "symmetric", row_normalize = TRUE, + normalize = TRUE) + if (is.null(res)) { + res$P <- Q + } + else { + res$P <- res$P + Q + } + } + + if (length(perplexities) > 1) { + res$P <- res$P / length(perplexities) + } + + if (is.logical(row_normalize)) { + tsmessage("Effective perplexity of multiscale P approx = ", + formatC(stats::median(perpp(res$P)))) + } + + res +} + +# Use the Intrinsic Dimensionality Perplexity (IDP) +# Scan through the provided perplexities and use the result which maximizes +# the mean correlation dimension (which is an estimate for the intrinsic +# dimensionality). Stops at the first maxmimum found. +idp <- function(X, perplexities = NULL, tol = 1e-5, + verbose = FALSE, guesses = NULL) { + if (methods::is(X, "dist")) { + n <- attr(X, "Size") + } + else { + n <- nrow(X) + } + + if (is.null(perplexities)) { + perplexities <- idp_perps(n) + } + if (verbose) { + tsmessage("Searching for intrinsic dimensionality with perplexities from ", + formatC(perplexities[1]), " to ", formatC(last(perplexities))) + } + + corr_dim_max <- -Inf + idp <- 0 + idp_res <- NULL + for (perplexity in perplexities) { + if (verbose) { + tsmessage("Commencing calibration for perplexity = ", + format_perps(perplexity)) + } + x2a_res <- x2aff(X = X, perplexity = perplexity, tol = tol, kernel = "gauss", + verbose = verbose, guesses = guesses) + corr_dim <- mean(x2a_res$dint) + if (corr_dim <= corr_dim_max) { + break + } + else { + corr_dim_max <- corr_dim + idp <- perplexity + idp_res <- x2a_res + } + } + if (idp <= 0) { + stop("Unable to find an IDP: all correlation dimensions were -ve") + } + if (verbose) { + tsmessage("Found IDP at perplexity = ", formatC(idp), + " intrinsic dimensionality = ", formatC(corr_dim_max)) + } + + idp_res$idp <- idp + idp_res +} + +# Come up with a set of candidate perplexities for finding the Intrinsic +# Dimensionality Perplexity. Use powers of 2 up to around half the data set +# size, or a perplexity of 128, whichever is smaller. Tries to provide a balance +# of coverage of useful perplexities vs time consumption. +idp_perps <- function(n) { + max_u <- min(128, max(2, ceiling(n / 2))) + max_uexp <- floor(log2(max_u)) + min_uexp <- min(2, max_uexp) + 2 ^ (min_uexp:max_uexp) +} + +# Is the perplexity argument a string or a list with the first element is a +# string? +perp_method <- function(perplexity) { + method <- "" + if (is.character(perplexity) || is.list(perplexity)) { + if (is.list(perplexity)) { + method <- perplexity[[1]] + } + else { + method <- perplexity + } + } + tolower(method) +} + +# If the user provided a list like ("idp", c(10, 20, 30)), extract the list +# of numbers as the candidate list of perplexities. Otherwise, return NULL +user_idp_perps <- function(perplexity) { + perplexities <- NULL + if (is.list(perplexity) && length(perplexity) == 2) { + perplexities <- perplexity[[2]] + } + perplexities +} \ No newline at end of file diff --git a/smallvis/R/smallvis.R b/smallvis/R/smallvis.R index 05dad45..1eb4e43 100644 --- a/smallvis/R/smallvis.R +++ b/smallvis/R/smallvis.R @@ -2098,440 +2098,7 @@ pca_whiten <- function(X, ncol = min(dim(X)), eps = 1e-5, verbose = FALSE) { } -# Perplexity Calibration -------------------------------------------------- - -# Calculates the input affinities from X, such that each normalized row of the -# affinity matrix has the specified perplexity (within the supplied tolerance). -# Returns a list containing the affinities, beta values and intrinsic -# dimensionalities. -# NB set default kernel to "exp" to get results closer to the tsne package. -# This differs from the procedure in the t-SNE paper by exponentially weighting -# the distances, rather than the squared distances. -x2aff <- function(X, perplexity = 15, tol = 1e-5, kernel = "gauss", - verbose = FALSE, guesses = NULL) { - x_is_dist <- methods::is(X, "dist") - if (x_is_dist) { - D <- X - n <- attr(D, "Size") - - D <- as.matrix(D) - if (kernel == "gauss") { - D <- D * D - } - } - else { - XX <- rowSums(X * X) - n <- nrow(X) - } - - nperps <- length(perplexity) - if (nperps > 1 && nperps != n) { - stop("Must provide one perplexity per point") - } - - if (!is.null(guesses) && length(guesses) != n) { - stop("Initial guess vector must match number of observations in X") - } - - W <- matrix(0, n, n) - intd <- rep(0, n) - if (!is.null(guesses)) { - beta <- guesses - } - else { - beta <- rep(1, n) - } - if (nperps == 1) { - logU <- log(perplexity) - } - else { - perps <- perplexity - } - bad_perp <- 0 - - for (i in 1:n) { - if (nperps > 1) { - perplexity <- perps[i] - logU <- log(perplexity) - } - betamin <- -Inf - betamax <- Inf - - if (x_is_dist) { - Di <- D[i, -i] - } - else { - Di <- (XX[i] + XX - 2 * colSums(tcrossprod(X[i, ], X)))[-i] - Di[Di < 0] <- 0 - if (kernel == "exp") { - Di <- sqrt(Di) - } - } - - # If we haven't been provided with guesses, then try the initialization used - # for all points in ELKI according to Schubert & Gertz in "Intrinsic - # t-Stochastic Neighbor Embedding for Visualization and Outlier Detection: A - # Remedy Against the Curse of Dimensionality?" Using the last optimized beta - # seems to be better most of the time based on my testing though, so we'll - # only use it for the first point. - if (is.null(guesses) && i == 1) { - beta[1] <- 0.5 * perplexity / mean(Di) - } - - sres <- shannon(Di, beta[i]) - H <- sres$H - Wi <- sres$W - sumWi <- sres$Z - - Hdiff <- H - logU - tries <- 0 - while (abs(Hdiff) > tol && tries < 50) { - if (Hdiff > 0) { - betamin <- beta[i] - if (is.infinite(betamax)) { - beta[i] <- beta[i] * 2 - } else { - beta[i] <- (beta[i] + betamax) / 2 - } - } else { - betamax <- beta[i] - if (is.infinite(betamin)) { - beta[i] <- beta[i] / 2 - } else { - beta[i] <- (beta[i] + betamin) / 2 - } - } - - sres <- shannon(Di, beta[i]) - H <- sres$H - Wi <- sres$W - sumWi <- sres$Z - - Hdiff <- H - logU - tries <- tries + 1 - } - if (abs(Hdiff) > tol) { - # Put a weight of 1/perplexity on the perplexity-nearest neighbors - bad_perp <- bad_perp + 1 - knn_idx <- order(Di, decreasing = FALSE)[1:max(floor(perplexity), 1)] - knn_idx[knn_idx >= i] <- knn_idx[knn_idx >= i] + 1 - Wi <- rep(0, n) - Wi[knn_idx] <- 1 / floor(perplexity) - - intd[i] <- 0 - W[i, ] <- Wi - } - else { - # if we didn't supply estimates for beta manually, then initialize guess for - # next point with optimized beta for this point: doesn't save many - # iterations, but why not? - if (is.null(guesses) && i < n) { - beta[i + 1] <- beta[i] - } - intd[i] <- intd_x2aff(Di, beta[i], Wi, sumWi, H) - W[i, -i] <- Wi - } - - } - sigma <- sqrt(1 / beta) - if (bad_perp > 0) { - tsmessage("Warning: ", bad_perp, " perplexity calibrations failed!") - warning(bad_perp, " perplexity calibrations failed") - } - - if (verbose) { - summarize(sigma, "sigma summary", verbose = verbose) - summarize(intd, "Dint", verbose = verbose) - } - list(W = W, beta = beta, dint = intd) -} - -# Calculates affinites based on exponential weighting of D2 with beta -# and returns a list containing: -# W, the affinities; Z, the sum of the affinities; H, the Shannon entropy -# This routine relies specifically on input weights being = exp(-beta * D) -# and calculates the Shannon entropy as log(Z) + beta * sum(W * D) / Z -# where Z is the sum of W. -shannon <- function(D2, beta) { - W <- exp(-D2 * beta) - Z <- sum(W) - - if (Z == 0) { - H <- 0 - } - else { - H <- log(Z) + beta * sum(D2 * W) / Z - } - list( - W = W, - Z = Z, - H = H - ) -} - -x2aff_sigma <- function(X, sigma = 1e-3, verbose = FALSE, use_cpp = FALSE, - n_threads = 1) { - x_is_dist <- methods::is(X, "dist") - if (x_is_dist) { - D <- X - - D <- as.matrix(D) - D <- D * D - } - else { - D <- calc_d2(X, use_cpp = use_cpp, n_threads = n_threads) - } - beta <- 1 / (sigma * sigma) - sres <- shannon(D, beta) - W <- sres$W - diag(W) <- 0 - - list(W = W, beta = beta) -} - -# Create a symmetrized distance matrix based on the k-nearest neighbors -# Non-neighbor distances are set to Inf -knn_dist <- function(X, k, n_threads, verbose) { - if (methods::is(X, "dist")) { - # If it's already a distance matrix, find k-smallest distance per column - # (ignoring self-distances of zero) and set everything larger to Inf - # (potentially more than k finite distances in the event of ties, not - # going to worry about that) - D <- as.matrix(X) - n <- nrow(D) - if (k > n - 1) { - stop("k must be not be > n - 1") - } - kdists <- Rfast::colnth(D, rep(k + 1, n)) - for (i in 1:n) { - Di <- D[, i] - Di[Di > kdists[i]] <- Inf - D[, i] <- Di - } - } - else { - # Find the k-nearest indexes and distances of X, and set the corresponding - # distance matrix elements - n <- nrow(X) - if (k > n - 1) { - stop("k must be not be > n - 1") - } - tsmessage("Finding ", k + 1, " nearest neighbors") - knn <- rnndescent::brute_force_knn(X, k = k + 1, n_threads = n_threads) - knn$idx <- knn$idx[, 2:(k + 1)] - knn$dist <- knn$dist[, 2:(k + 1)] - - D <- matrix(Inf, nrow = n, ncol = n) - diag(D) <- 0 - for (i in 1:n) { - D[i, knn$idx[i, ]] <- knn$dist[i, ] - } - } - - # symmetrize - pmin(D, t(D)) -} - -# Create the knn graph: D[i, j] = 1 if j is one of i's k-nearest neighbors. -# i is NOT considered a neighbor of itself. -# No symmetrization is carried out. -# Used by knnmmds and knn kernel for SNE -knn_graph <- function(X, k, n_threads, verbose) { - if (methods::is(X, "dist")) { - D <- as.matrix(X) - n <- nrow(D) - if (k > n - 1) { - stop("k must be not be > n - 1") - } - kdists <- Rfast::colnth(D, rep(k + 1, n)) - for (i in 1:n) { - Di <- D[, i] - Di[Di > kdists[i]] <- 0 - D[, i] <- 1 - } - } - else { - # Find the k-nearest indexes and distances of X, and set the corresponding - # distance matrix elements - n <- nrow(X) - if (k > n - 1) { - stop("k must be not be > n - 1") - } - - tsmessage("Finding ", k + 1, " nearest neighbors") - knn <- rnndescent::brute_force_knn(X, k = k + 1, n_threads = n_threads) - knn$idx <- knn$idx[, 2:(k + 1)] - - D <- matrix(0, nrow = n, ncol = n) - for (i in 1:n) { - D[i, knn$idx[i, ]] <- 1 - } - } - D -} - -# Given data X and k nearest neighbors, return a geodisic distance matrix -# Disconnections are treated by using the Euclidean distance. -geodesic <- function(X, k, fill = TRUE, use_cpp = FALSE, n_threads = 0, - verbose = FALSE) { - tsmessage("Calculating geodesic distances with k = ", k) - - R <- knn_dist(X, k, n_threads = n_threads, verbose = verbose) - # The hard work is done by Rfast's implementation of Floyd's algorithm - G <- Rfast::floyd(R) - if (any(is.infinite(G)) && fill) { - tsmessage("k = ", k, " resulted in disconnections: filling with Euclidean distances") - if (methods::is(X, "dist")) { - R <- as.matrix(X) - } - else { - R <- calc_d(X, use_cpp = use_cpp, n_threads = n_threads) - } - G[is.infinite(G)] <- R[is.infinite(G)] - } - G -} - -# Multiscale perplexities: P is an average over the results of multiple -# perplexities -# as described by de Bodt et al in -# Perplexity-free t-SNE and twice Student tt-SNE (2018) -msp <- function(X, perplexities = NULL, tol = 1e-5, - symmetrize = "symmetric", row_normalize = TRUE, - normalize = TRUE, - verbose = FALSE, guesses = NULL) { - if (methods::is(X, "dist")) { - n <- attr(X, "Size") - } - else { - n <- nrow(X) - } - - if (is.null(perplexities)) { - perplexities <- idp_perps(n) - } - tsmessage("Calculating multi-scale P with perplexities from ", - formatC(perplexities[1]), " to ", formatC(last(perplexities))) - - res <- NULL - for (perplexity in perplexities) { - tsmessage("Commencing calibration for perplexity = ", - format_perps(perplexity)) - x2a_res <- x2aff(X = X, perplexity = perplexity, tol = tol, kernel = "gauss", - verbose = verbose, guesses = guesses) - P <- x2a_res$W - Q <- scale_affinities(P, - symmetrize = "symmetric", row_normalize = TRUE, - normalize = TRUE) - if (is.null(res)) { - res$P <- Q - } - else { - res$P <- res$P + Q - } - } - - if (length(perplexities) > 1) { - res$P <- res$P / length(perplexities) - } - - if (is.logical(row_normalize)) { - tsmessage("Effective perplexity of multiscale P approx = ", - formatC(stats::median(perpp(res$P)))) - } - - res -} - -# Use the Intrinsic Dimensionality Perplexity (IDP) -# Scan through the provided perplexities and use the result which maximizes -# the mean correlation dimension (which is an estimate for the intrinsic -# dimensionality). Stops at the first maxmimum found. -idp <- function(X, perplexities = NULL, tol = 1e-5, - verbose = FALSE, guesses = NULL) { - if (methods::is(X, "dist")) { - n <- attr(X, "Size") - } - else { - n <- nrow(X) - } - - if (is.null(perplexities)) { - perplexities <- idp_perps(n) - } - if (verbose) { - tsmessage("Searching for intrinsic dimensionality with perplexities from ", - formatC(perplexities[1]), " to ", formatC(last(perplexities))) - } - - corr_dim_max <- -Inf - idp <- 0 - idp_res <- NULL - for (perplexity in perplexities) { - if (verbose) { - tsmessage("Commencing calibration for perplexity = ", - format_perps(perplexity)) - } - x2a_res <- x2aff(X = X, perplexity = perplexity, tol = tol, kernel = "gauss", - verbose = verbose, guesses = guesses) - corr_dim <- mean(x2a_res$dint) - if (corr_dim <= corr_dim_max) { - break - } - else { - corr_dim_max <- corr_dim - idp <- perplexity - idp_res <- x2a_res - } - } - if (idp <= 0) { - stop("Unable to find an IDP: all correlation dimensions were -ve") - } - if (verbose) { - tsmessage("Found IDP at perplexity = ", formatC(idp), - " intrinsic dimensionality = ", formatC(corr_dim_max)) - } - - idp_res$idp <- idp - idp_res -} - -# Come up with a set of candidate perplexities for finding the Intrinsic -# Dimensionality Perplexity. Use powers of 2 up to around half the data set -# size, or a perplexity of 128, whichever is smaller. Tries to provide a balance -# of coverage of useful perplexities vs time consumption. -idp_perps <- function(n) { - max_u <- min(128, max(2, ceiling(n / 2))) - max_uexp <- floor(log2(max_u)) - min_uexp <- min(2, max_uexp) - 2 ^ (min_uexp:max_uexp) -} - -# Is the perplexity argument a string or a list with the first element is a -# string? -perp_method <- function(perplexity) { - method <- "" - if (is.character(perplexity) || is.list(perplexity)) { - if (is.list(perplexity)) { - method <- perplexity[[1]] - } - else { - method <- perplexity - } - } - tolower(method) -} - -# If the user provided a list like ("idp", c(10, 20, 30)), extract the list -# of numbers as the candidate list of perplexities. Otherwise, return NULL -user_idp_perps <- function(perplexity) { - perplexities <- NULL - if (is.list(perplexity) && length(perplexity) == 2) { - perplexities <- perplexity[[2]] - } - perplexities -} # Utility Functions ------------------------------------------------------- From 88b2a641ecaa4fcf05bde2007829ff70b93a3bb1 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sat, 3 Aug 2024 11:23:29 -0700 Subject: [PATCH 11/26] move utils --- smallvis/R/smallvis.R | 146 ------------------------------------------ smallvis/R/util.R | 142 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 146 deletions(-) create mode 100644 smallvis/R/util.R diff --git a/smallvis/R/smallvis.R b/smallvis/R/smallvis.R index 1eb4e43..3c6c1c6 100644 --- a/smallvis/R/smallvis.R +++ b/smallvis/R/smallvis.R @@ -2097,152 +2097,6 @@ pca_whiten <- function(X, ncol = min(dim(X)), eps = 1e-5, verbose = FALSE) { sweep(pca$scores, 2, sqrt(pca$lambda + eps), "/") } - - - -# Utility Functions ------------------------------------------------------- - -# Create matrix of squared Euclidean distances -# For low dimension, X %*% t(X) seems to a bit faster than tcrossprod(X) -# Small -ve distances are possible -dist2 <- function(X) { - D2 <- rowSums(X * X) - D2 + sweep(X %*% t(X) * -2, 2, t(D2), `+`) -} - -calc_d2 <- function(X, use_cpp = FALSE, n_threads = 1) { - if (use_cpp) { - dist2_cpp(X, n_threads = n_threads) - } - else { - safe_dist2(X) - } -} - -calc_d <- function(X, use_cpp = FALSE, n_threads = 1) { - if (use_cpp) { - dist_cpp(X, n_threads = n_threads) - } - else { - sqrt(safe_dist2(X)) - } -} - -# Squared Euclidean distances, ensuring no small -ve distances can occur -safe_dist2 <- function(X) { - D2 <- dist2(X) - D2[D2 < 0] <- 0 - D2 -} - -calc_d2tweight <- function(D2, use_cpp = FALSE, n_threads = 1) { - if (use_cpp) { - d2_to_tweight_cpp(D2, n_threads = n_threads) - } - else { - 1 / (1 + D2) - } -} - -calc_tweight <- function(X, use_cpp = FALSE, n_threads = 1) { - if (use_cpp) { - tweight_cpp(X, n_threads = n_threads) - } - else { - D2 <- calc_d2(X, use_cpp = use_cpp, n_threads = n_threads) - 1 / (1 + D2) - } -} - -# 2-norm of a vector or matrix -norm2 <- function(X) { - sqrt(sum(X * X)) -} - -# Simple time stamp -stime <- function() { - format(Sys.time(), "%T") -} - -# message with a time stamp -tsmessage <- function(..., domain = NULL, appendLF = TRUE, force = FALSE, - time_stamp = TRUE) { - verbose <- get0("verbose", envir = sys.parent()) - - if (force || (!is.null(verbose) && verbose)) { - msg <- "" - if (time_stamp) { - msg <- paste0(stime(), " ") - } - message(msg, ..., domain = domain, appendLF = appendLF) - utils::flush.console() - } -} - -# merge lists, where anything non-NULL in l is kept -# e.g. -# all(unlist(update_list(list(a = 1, b = 2), list(a = 10, c = 3))) == -# unlist(list(a = 1, b = 2, c = 3))) -lmerge <- function(l, l2) { - for (name in names(l2)) { - if (is.null(l[[name]])) { - l[[name]] <- l2[[name]] - } - } - l -} - -# replaces the contents of l with the named arguments -# e.g. -# all(lreplace(c(a = 1, b = 2), a = 10, c = 3) == -# c(a = 10, b = 2, c = 3)) -lreplace <- function(l, ...) { - varargs <- list(...) - for (i in names(varargs)) { - l[[i]] <- varargs[[i]] - } - l -} - -# relative tolerance between x and y -reltol <- function(x, y) { - abs(x - y) / min(abs(x), abs(y)) -} - -# Check if a value is non-null and true -nnat <- function(x) { - !is.null(x) && is.logical(x) && x -} - -# log vector information -summarize <- function(X, msg = "", verbose = FALSE) { - summary_X <- summary(X, digits = max(3, getOption("digits") - 3)) - tsmessage(msg, ": ", paste(names(summary_X), ":", summary_X, "|", - collapse = "")) -} - -# Format perplexity as a string. Could be a scalar or a vector. In the latter -# case, just list the first two values and then ellipses -format_perps <- function(perplexity) { - if (length(perplexity) > 1) { - paste0(formatC(perplexity[1]), ", ", - formatC(perplexity[2]), "...") - } - else { - formatC(perplexity) - } -} - -# last item of a vector -last <- function(x) { - x[length(x)] -} - -# remove NULL items from a list -remove_nulls <- function(l) { - l[!vapply(l, is.null, logical(1))] -} - # UMAP ------------------------------------------------------------------- # Fits a kernel for the output distances of the form w = 1 / (1 + a dsq ^ b) diff --git a/smallvis/R/util.R b/smallvis/R/util.R new file mode 100644 index 0000000..9d0efc6 --- /dev/null +++ b/smallvis/R/util.R @@ -0,0 +1,142 @@ +# Utility Functions ------------------------------------------------------- + +# Create matrix of squared Euclidean distances +# For low dimension, X %*% t(X) seems to a bit faster than tcrossprod(X) +# Small -ve distances are possible +dist2 <- function(X) { + D2 <- rowSums(X * X) + D2 + sweep(X %*% t(X) * -2, 2, t(D2), `+`) +} + +calc_d2 <- function(X, use_cpp = FALSE, n_threads = 1) { + if (use_cpp) { + dist2_cpp(X, n_threads = n_threads) + } + else { + safe_dist2(X) + } +} + +calc_d <- function(X, use_cpp = FALSE, n_threads = 1) { + if (use_cpp) { + dist_cpp(X, n_threads = n_threads) + } + else { + sqrt(safe_dist2(X)) + } +} + +# Squared Euclidean distances, ensuring no small -ve distances can occur +safe_dist2 <- function(X) { + D2 <- dist2(X) + D2[D2 < 0] <- 0 + D2 +} + +calc_d2tweight <- function(D2, use_cpp = FALSE, n_threads = 1) { + if (use_cpp) { + d2_to_tweight_cpp(D2, n_threads = n_threads) + } + else { + 1 / (1 + D2) + } +} + +calc_tweight <- function(X, use_cpp = FALSE, n_threads = 1) { + if (use_cpp) { + tweight_cpp(X, n_threads = n_threads) + } + else { + D2 <- calc_d2(X, use_cpp = use_cpp, n_threads = n_threads) + 1 / (1 + D2) + } +} + +# 2-norm of a vector or matrix +norm2 <- function(X) { + sqrt(sum(X * X)) +} + +# Simple time stamp +stime <- function() { + format(Sys.time(), "%T") +} + +# message with a time stamp +tsmessage <- function(..., domain = NULL, appendLF = TRUE, force = FALSE, + time_stamp = TRUE) { + verbose <- get0("verbose", envir = sys.parent()) + + if (force || (!is.null(verbose) && verbose)) { + msg <- "" + if (time_stamp) { + msg <- paste0(stime(), " ") + } + message(msg, ..., domain = domain, appendLF = appendLF) + utils::flush.console() + } +} + +# merge lists, where anything non-NULL in l is kept +# e.g. +# all(unlist(update_list(list(a = 1, b = 2), list(a = 10, c = 3))) == +# unlist(list(a = 1, b = 2, c = 3))) +lmerge <- function(l, l2) { + for (name in names(l2)) { + if (is.null(l[[name]])) { + l[[name]] <- l2[[name]] + } + } + l +} + +# replaces the contents of l with the named arguments +# e.g. +# all(lreplace(c(a = 1, b = 2), a = 10, c = 3) == +# c(a = 10, b = 2, c = 3)) +lreplace <- function(l, ...) { + varargs <- list(...) + for (i in names(varargs)) { + l[[i]] <- varargs[[i]] + } + l +} + +# relative tolerance between x and y +reltol <- function(x, y) { + abs(x - y) / min(abs(x), abs(y)) +} + +# Check if a value is non-null and true +nnat <- function(x) { + !is.null(x) && is.logical(x) && x +} + +# log vector information +summarize <- function(X, msg = "", verbose = FALSE) { + summary_X <- summary(X, digits = max(3, getOption("digits") - 3)) + tsmessage(msg, ": ", paste(names(summary_X), ":", summary_X, "|", + collapse = "")) +} + +# Format perplexity as a string. Could be a scalar or a vector. In the latter +# case, just list the first two values and then ellipses +format_perps <- function(perplexity) { + if (length(perplexity) > 1) { + paste0(formatC(perplexity[1]), ", ", + formatC(perplexity[2]), "...") + } + else { + formatC(perplexity) + } +} + +# last item of a vector +last <- function(x) { + x[length(x)] +} + +# remove NULL items from a list +remove_nulls <- function(l) { + l[!vapply(l, is.null, logical(1))] +} \ No newline at end of file From fca3880c587939cf7e2faf0f23c7bc60c37266b0 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sat, 3 Aug 2024 11:25:20 -0700 Subject: [PATCH 12/26] move init code --- smallvis/R/init.R | 159 ++++++++++++++++++++++++++++++++++++++++++ smallvis/R/smallvis.R | 156 ----------------------------------------- 2 files changed, 159 insertions(+), 156 deletions(-) create mode 100644 smallvis/R/init.R diff --git a/smallvis/R/init.R b/smallvis/R/init.R new file mode 100644 index 0000000..2c83c43 --- /dev/null +++ b/smallvis/R/init.R @@ -0,0 +1,159 @@ +# Output Initialization --------------------------------------------------- + +# Initialization of the output coordinates +init_out <- function(Y_init, X, n, ndim, pca_preprocessed, verbose = FALSE) { + switch(Y_init, + pca = { + tsmessage("Initializing from PCA scores") + pca_init(X, ndim, pca_preprocessed, verbose) + }, + rand = { + tsmessage("Initializing from random Gaussian") + matrix(stats::rnorm(ndim * n, sd = 1), n) + } + ) +} + +pca_init <- function(X, ndim, pca_preprocessed, verbose = FALSE) { + # If we've already done PCA, we can just take the first two columns + if (pca_preprocessed) { + X[, 1:2] + } + else { + pca_scores(X, ncol = ndim, verbose = verbose) + } +} + +# Laplacian Eigenmap (Belkin & Niyogi, 2002) +# Original formulation solves the generalized eigenvalue problem of the +# unnormalized graph Laplacian: L v = lambda D v, where L = D - A +# and uses the bottom eigenvectors v that result +# (ignoring the constant eigenvector associated with the smallest eigenvalue). +# +# This is equivalent to using the top eigenvectors from the usual +# eigendecomposition of a row-normalized Laplacian P = D^-1 A: P v = lambda' v +# so we don't need to depend on an external package for generalized eigenvalues. +# Note that while the eigenvectors are the same, the eigenvalues are +# different: lambda' = 1 - lambda, but we don't use them with Laplacian +# Eigenmaps anyway. +# +# As we only need to calculate the top ndim + 1 eigenvectors (i.e. normally 3) +# it's incredibly wasteful to calculate all of them. Therefore, if the +# RSpectra library is available, we use that instead, which allows for only the +# top eigenvectors to be extracted. Otherwise, use the slower eigen routine. +# A must be symmetric and positive semi definite, but not necessarily +# normalized in any specific way. +laplacian_eigenmap <- function(A, ndim = 2, use_RSpectra = TRUE) { + # Equivalent to: D <- diag(colSums(A)); M <- solve(D) %*% A + # This effectively row-normalizes A: colSums is normally faster than rowSums + # and because A is symmetric, they're equivalent + M <- A / colSums(A) + if (use_RSpectra && requireNamespace("RSpectra", quietly = TRUE, + warn.conflicts = FALSE)) { + tsmessage("Using RSpectra for eigenvectors") + Re(RSpectra::eigs(M, k = ndim + 1)$vectors[, 2:(ndim + 1)]) + } + else { + tsmessage("Using eigen for eigenvectors") + eigen(M, symmetric = FALSE)$vectors[, 2:(ndim + 1)] + } +} + +# Use a normalized Laplacian. The UMAP approach, taken from version 0.2.1. +normalized_spectral_init <- function(A, ndim = 2, use_RSpectra = TRUE) { + n <- nrow(A) + # Normalized Laplacian: clear and close to UMAP code, but very slow in R + # I <- diag(1, nrow = n, ncol = n) + # D <- diag(1 / sqrt(colSums(A))) + # L <- I - D %*% A %*% D + + # A lot faster (order of magnitude when n = 1000) + Dsq <- sqrt(colSums(A)) + L <- -t(A / Dsq) / Dsq + diag(L) <- 1 + diag(L) + + if (use_RSpectra && requireNamespace("RSpectra", quietly = TRUE, + warn.conflicts = FALSE)) { + tsmessage("Using RSpectra for eigenvectors") + k <- ndim + 1 + ncv <- max(2 * k + 1, floor(sqrt(n))) + opt <- list( + ncv = ncv, + maxitr = 5 * n, + tol = 1e-4 + ) + res <- RSpectra::eigs(L, k = k, which = "SM", opt = opt) + vec_indices <- rev(order(res$values, decreasing = TRUE)[1:ndim]) + res <- Re(res$vectors[, vec_indices]) + } + else { + tsmessage("Using eigen for eigenvectors") + res <- eigen(L, symmetric = FALSE) + vec_indices <- order(res$values, decreasing = FALSE)[2:(ndim + 1)] + res <- Re(res$vectors[, vec_indices]) + } + res +} + +is_spectral_init <- function(init) { + tolower(init) %in% c("laplacian", "normlaplacian") +} + +# Rescale embedding so that the standard deviation is the specified value. +# Default gives initialization like t-SNE, but not random. +shrink_coords <- function(X, sdev = 1e-4) { + scale(X, scale = apply(X, 2, stats::sd) / sdev) +} + +# PCA --------------------------------------------------------------------- + + +# Calculates a matrix containing the first ncol columns of the PCA scores. +# Returns the score matrix unless ret_extra is TRUE, in which case a list +# is returned also containing the eigenvalues +pca_scores <- function(X, ncol = min(dim(X)), verbose = FALSE, + ret_extra = FALSE) { + if (methods::is(X, "dist")) { + res_mds <- stats::cmdscale(X, x.ret = TRUE, eig = TRUE, k = ncol) + + if (ret_extra || verbose) { + lambda <- res_mds$eig + varex <- sum(lambda[1:ncol]) / sum(lambda) + tsmessage("PCA (using classical MDS): ", ncol, " components explained ", + formatC(varex * 100), "% variance") + } + scores <- res_mds$points + } + else { + X <- scale(X, center = TRUE, scale = FALSE) + # do SVD on X directly rather than forming covariance matrix + s <- svd(X, nu = ncol, nv = 0) + D <- diag(c(s$d[1:ncol])) + if (verbose || ret_extra) { + # calculate eigenvalues of covariance matrix from singular values + lambda <- (s$d ^ 2) / (nrow(X) - 1) + varex <- sum(lambda[1:ncol]) / sum(lambda) + tsmessage("PCA: ", ncol, " components explained ", formatC(varex * 100), + "% variance") + } + scores <- s$u %*% D + } + + if (ret_extra) { + list( + scores = scores, + lambda = lambda[1:ncol] + ) + } + else { + scores + } +} + +# Whiten the data by PCA. This both reduces the dimensionality, but also +# scales the scores by the inverse square root of the equivalent eigenvalue +# so that the variance of each column is 1. +pca_whiten <- function(X, ncol = min(dim(X)), eps = 1e-5, verbose = FALSE) { + pca <- pca_scores(X, ncol = ncol, verbose = verbose, ret_extra = TRUE) + sweep(pca$scores, 2, sqrt(pca$lambda + eps), "/") +} diff --git a/smallvis/R/smallvis.R b/smallvis/R/smallvis.R index 3c6c1c6..d2932a6 100644 --- a/smallvis/R/smallvis.R +++ b/smallvis/R/smallvis.R @@ -1746,112 +1746,7 @@ pca_preprocess <- function(X, pca, whiten, initial_dims, verbose = FALSE) { X } -# Output Initialization --------------------------------------------------- - -# Initialization of the output coordinates -init_out <- function(Y_init, X, n, ndim, pca_preprocessed, verbose = FALSE) { - switch(Y_init, - pca = { - tsmessage("Initializing from PCA scores") - pca_init(X, ndim, pca_preprocessed, verbose) - }, - rand = { - tsmessage("Initializing from random Gaussian") - matrix(stats::rnorm(ndim * n, sd = 1), n) - } - ) -} -pca_init <- function(X, ndim, pca_preprocessed, verbose = FALSE) { - # If we've already done PCA, we can just take the first two columns - if (pca_preprocessed) { - X[, 1:2] - } - else { - pca_scores(X, ncol = ndim, verbose = verbose) - } -} - -# Laplacian Eigenmap (Belkin & Niyogi, 2002) -# Original formulation solves the generalized eigenvalue problem of the -# unnormalized graph Laplacian: L v = lambda D v, where L = D - A -# and uses the bottom eigenvectors v that result -# (ignoring the constant eigenvector associated with the smallest eigenvalue). -# -# This is equivalent to using the top eigenvectors from the usual -# eigendecomposition of a row-normalized Laplacian P = D^-1 A: P v = lambda' v -# so we don't need to depend on an external package for generalized eigenvalues. -# Note that while the eigenvectors are the same, the eigenvalues are -# different: lambda' = 1 - lambda, but we don't use them with Laplacian -# Eigenmaps anyway. -# -# As we only need to calculate the top ndim + 1 eigenvectors (i.e. normally 3) -# it's incredibly wasteful to calculate all of them. Therefore, if the -# RSpectra library is available, we use that instead, which allows for only the -# top eigenvectors to be extracted. Otherwise, use the slower eigen routine. -# A must be symmetric and positive semi definite, but not necessarily -# normalized in any specific way. -laplacian_eigenmap <- function(A, ndim = 2, use_RSpectra = TRUE) { - # Equivalent to: D <- diag(colSums(A)); M <- solve(D) %*% A - # This effectively row-normalizes A: colSums is normally faster than rowSums - # and because A is symmetric, they're equivalent - M <- A / colSums(A) - if (use_RSpectra && requireNamespace("RSpectra", quietly = TRUE, - warn.conflicts = FALSE)) { - tsmessage("Using RSpectra for eigenvectors") - Re(RSpectra::eigs(M, k = ndim + 1)$vectors[, 2:(ndim + 1)]) - } - else { - tsmessage("Using eigen for eigenvectors") - eigen(M, symmetric = FALSE)$vectors[, 2:(ndim + 1)] - } -} - -# Use a normalized Laplacian. The UMAP approach, taken from version 0.2.1. -normalized_spectral_init <- function(A, ndim = 2, use_RSpectra = TRUE) { - n <- nrow(A) - # Normalized Laplacian: clear and close to UMAP code, but very slow in R - # I <- diag(1, nrow = n, ncol = n) - # D <- diag(1 / sqrt(colSums(A))) - # L <- I - D %*% A %*% D - - # A lot faster (order of magnitude when n = 1000) - Dsq <- sqrt(colSums(A)) - L <- -t(A / Dsq) / Dsq - diag(L) <- 1 + diag(L) - - if (use_RSpectra && requireNamespace("RSpectra", quietly = TRUE, - warn.conflicts = FALSE)) { - tsmessage("Using RSpectra for eigenvectors") - k <- ndim + 1 - ncv <- max(2 * k + 1, floor(sqrt(n))) - opt <- list( - ncv = ncv, - maxitr = 5 * n, - tol = 1e-4 - ) - res <- RSpectra::eigs(L, k = k, which = "SM", opt = opt) - vec_indices <- rev(order(res$values, decreasing = TRUE)[1:ndim]) - res <- Re(res$vectors[, vec_indices]) - } - else { - tsmessage("Using eigen for eigenvectors") - res <- eigen(L, symmetric = FALSE) - vec_indices <- order(res$values, decreasing = FALSE)[2:(ndim + 1)] - res <- Re(res$vectors[, vec_indices]) - } - res -} - -is_spectral_init <- function(init) { - tolower(init) %in% c("laplacian", "normlaplacian") -} - -# Rescale embedding so that the standard deviation is the specified value. -# Default gives initialization like t-SNE, but not random. -shrink_coords <- function(X, sdev = 1e-4) { - scale(X, scale = apply(X, 2, stats::sd) / sdev) -} # Epoch Functions --------------------------------------------------------- @@ -2044,58 +1939,7 @@ ret_value <- function(Y, ret_extra, method, X, scale, Y_init, iter, start_time = } } -# PCA --------------------------------------------------------------------- - - -# Calculates a matrix containing the first ncol columns of the PCA scores. -# Returns the score matrix unless ret_extra is TRUE, in which case a list -# is returned also containing the eigenvalues -pca_scores <- function(X, ncol = min(dim(X)), verbose = FALSE, - ret_extra = FALSE) { - if (methods::is(X, "dist")) { - res_mds <- stats::cmdscale(X, x.ret = TRUE, eig = TRUE, k = ncol) - - if (ret_extra || verbose) { - lambda <- res_mds$eig - varex <- sum(lambda[1:ncol]) / sum(lambda) - tsmessage("PCA (using classical MDS): ", ncol, " components explained ", - formatC(varex * 100), "% variance") - } - scores <- res_mds$points - } - else { - X <- scale(X, center = TRUE, scale = FALSE) - # do SVD on X directly rather than forming covariance matrix - s <- svd(X, nu = ncol, nv = 0) - D <- diag(c(s$d[1:ncol])) - if (verbose || ret_extra) { - # calculate eigenvalues of covariance matrix from singular values - lambda <- (s$d ^ 2) / (nrow(X) - 1) - varex <- sum(lambda[1:ncol]) / sum(lambda) - tsmessage("PCA: ", ncol, " components explained ", formatC(varex * 100), - "% variance") - } - scores <- s$u %*% D - } - - if (ret_extra) { - list( - scores = scores, - lambda = lambda[1:ncol] - ) - } - else { - scores - } -} -# Whiten the data by PCA. This both reduces the dimensionality, but also -# scales the scores by the inverse square root of the equivalent eigenvalue -# so that the variance of each column is 1. -pca_whiten <- function(X, ncol = min(dim(X)), eps = 1e-5, verbose = FALSE) { - pca <- pca_scores(X, ncol = ncol, verbose = verbose, ret_extra = TRUE) - sweep(pca$scores, 2, sqrt(pca$lambda + eps), "/") -} # UMAP ------------------------------------------------------------------- From 27fcc5ea4c62362ed7dae60288523c7ee8fa7c94 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sat, 3 Aug 2024 11:27:55 -0700 Subject: [PATCH 13/26] move umap code --- smallvis/R/cost.R | 149 ---------------- smallvis/R/smallvis.R | 183 -------------------- smallvis/R/umap.R | 386 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 386 insertions(+), 332 deletions(-) create mode 100644 smallvis/R/umap.R diff --git a/smallvis/R/cost.R b/smallvis/R/cost.R index db81356..d1a6768 100644 --- a/smallvis/R/cost.R +++ b/smallvis/R/cost.R @@ -176,155 +176,6 @@ largevis <- function(perplexity, inp_kernel = "gaussian", ) } -# UMAP -------------------------------------------------------------------- - -umap <- function(perplexity, inp_kernel = "skd", symmetrize = "umap", - spread = 1, min_dist = 0.001, gr_eps = 0.1, eps = 1e-9, - row_weight = NULL, n_threads = 0, use_cpp = FALSE) { - if (!is.null(row_weight)) { - row_normalize <- row_weight - } - else { - row_normalize <- FALSE - } - lreplace(tsne(perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = FALSE, - row_normalize = row_normalize, n_threads = n_threads, - verbose = verbose, ret_extra = ret_extra, - use_cpp = use_cpp) - - cost <- init_ab(cost, spread = spread, min_dist = min_dist, - verbose = verbose) - - cost$eps <- eps - cost - }, - cache_input = function(cost) { - P <- cost$P - eps <- cost$eps - cost$Cp <- colSums(P * log(P + eps) + (1 - P) * log1p(-P + eps)) - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - - P <- cost$P - eps <- cost$eps - W <- cost$W - cost$pcost <- colSums(-P * log(W + eps) - (1 - P) * log1p(-W + eps)) + cost$Cp - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - - cost$G <- k2g(Y, 4 * (cost$b / (cost$D2 + cost$eps + gr_eps)) * (cost$P - cost$W)) - cost - }, - update = function(cost, Y) { - D2 <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - D2[D2 < 0] <- 0 - - W <- 1 / (1 + cost$a * D2 ^ cost$b) - diag(W) <- 0 - - cost$W <- W - cost$D2 <- D2 - cost - }, - export = cost_export - ) -} - -# UMAP with the output kernel fixed to the t-distribution -tumap <- function(perplexity, inp_kernel = "skd", symmetrize = "umap", - gr_eps = 0.1, eps = 1e-9, row_weight = NULL, n_threads = 0, - use_cpp = FALSE) { - if (!is.null(row_weight)) { - row_normalize <- row_weight - } - else { - row_normalize <- FALSE - } - lreplace(umap(perplexity, n_threads = n_threads, use_cpp = use_cpp), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - cost$eps <- eps - - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = FALSE, - row_normalize = row_normalize, n_threads = n_threads, - verbose = verbose, ret_extra = ret_extra, - use_cpp = use_cpp) - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - - cost$G <- k2g(Y, 4 * (cost$W / ((1 - cost$W) + cost$eps + gr_eps)) * (cost$P - cost$W)) - cost - }, - update = function(cost, Y) { - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - W <- 1 / (1 + W) - diag(W) <- 0 - - cost$W <- W - cost - } - ) -} - -# t-UMAP where output and input affinities are normalized -ntumap <- function(perplexity, inp_kernel = "skd", symmetrize = "umap", - gr_eps = 0.1, eps = 1e-9, n_threads = 0, use_cpp = FALSE) { - lreplace(tumap(perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - cost$eps <- eps - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = TRUE, - n_threads = n_threads, verbose = verbose, - ret_extra = ret_extra, use_cpp = use_cpp) - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - - P <- cost$P - eps <- cost$eps - Q <- cost$Q - - cost$pcost <- colSums(-P * logm(Q, eps) - (1 - P) * log1p(-Q + eps)) + cost$Cp - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - cost$G <- k2g(Y, 4 * cost$W * (cost$C - cost$sumC * cost$Q)) - cost - }, - update = function(cost, Y) { - P <- cost$P - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - W <- 1 / (1 + W) - diag(W) <- 0 - - Q <- W / sum(W) - C <- (P - Q) / (1 - Q) - sumC <- sum(C) - - cost$W <- W - cost$Q <- Q - cost$C <- C - cost$sumC <- sumC - - cost - }, - export = cost_export - ) -} # f-Divergences ----------------------------------------------------------- diff --git a/smallvis/R/smallvis.R b/smallvis/R/smallvis.R index d2932a6..4a3989f 100644 --- a/smallvis/R/smallvis.R +++ b/smallvis/R/smallvis.R @@ -1938,186 +1938,3 @@ ret_value <- function(Y, ret_extra, method, X, scale, Y_init, iter, start_time = Y } } - - - -# UMAP ------------------------------------------------------------------- - -# Fits a kernel for the output distances of the form w = 1 / (1 + a dsq ^ b) -# where dsq is the squared Euclidean distance. -# Standard t-SNE function is a = 1, b = 1. -# Default UMAP values are a = 1.929, b = 0.7915. -find_ab_params <- function(spread = 1, min_dist = 0.001) { - xv <- seq(from = 0, to = spread * 3, length.out = 300) - yv <- rep(0, length(xv)) - yv[xv < min_dist] <- 1 - yv[xv >= min_dist] <- exp(-(xv[xv >= min_dist] - min_dist) / spread) - result <- try({ - stats::nls(yv ~ 1 / (1 + a * xv ^ (2 * b)), - start = list(a = 1, b = 1))$m$getPars() - }, silent = TRUE) - if (methods::is(result, "try-error")) { - stop("Can't find a, b for provided spread/min_dist values") - } - result -} - -# The UMAP equivalent of perplexity calibration in x2aff. k is continuous rather -# than integral and so is analogous to perplexity. -# Some differences: -# 1. The target value is the log2 of k, not the Shannon entropy associated -# with the desired perplexity. -# 2. Input weights are exponential, rather than Gaussian, with respect to the -# distances. The distances are also centered with respect to the smoothed -# distance to the nearest (non-zero distance) neighbor. A non-integral -# 'local_connectivity' value can result in this shortest distance between an -# interpolated value between two distances. -# 3. The weights are not normalized. Their raw sum is compared to the target -# value. -# 4. Distances beyond the k-nearest neighbors are not used in the calibration. -# The equivalent weights are set to 0. -# 5. Weights associated with distances shorter than the smoothed nearest -# neighbor distance are clamped to 1. -# This code has been converted from the original Python and may not be very -# idiomatic (or vectorizable). -# tol is SMOOTH_K_TOLERANCE in the Python code. -smooth_knn_distances <- - function(X, - k, - n_iter = 64, - local_connectivity = 1.0, - bandwidth = 1.0, - tol = 1e-5, - min_k_dist_scale = 1e-3, - cardinality = log2(k), - n_threads = 0, - verbose = FALSE) { - - tsmessage("Commencing smooth kNN distance calibration for k = ", formatC(k)) - - if (methods::is(X, "dist")) { - X <- as.matrix(X) - nn_idx <- t(apply(X, 2, order))[, 1:k] - nn_dist <- matrix(0, nrow = nrow(X), ncol = k) - for (i in 1:nrow(nn_idx)) { - nn_dist[i, ] <- X[i, nn_idx[i, ]] - } - } - else { - tsmessage("Finding ", k + 1, " nearest neighbors") - knn <- rnndescent::brute_force_knn(X, k = k, n_threads = n_threads) - knn$idx <- knn$idx[, 2:k] - knn$dist <- knn$dist[, 2:k] - - nn_idx <- matrix(nrow = nrow(X), ncol = k) - nn_idx[, 1] <- 1:nrow(nn_idx) - nn_idx[, 2:ncol(nn_idx)] <- knn$idx - nn_dist <- matrix(0, nrow = nrow(X), ncol = k) - nn_dist[, 2:ncol(nn_dist)] <- knn$dist - } - - n <- nrow(nn_dist) - target <- cardinality * bandwidth - rho <- rep(0, n) - sigma <- rep(0, n) - P <- matrix(0, nrow = n, ncol = n) - mean_distances <- NULL - - for (i in 1:n) { - lo <- 0.0 - hi <- Inf - mid <- 1.0 - - ith_distances <- nn_dist[i, ] - non_zero_dists <- ith_distances[ith_distances > 0.0] - if (length(non_zero_dists) >= local_connectivity) { - index <- floor(local_connectivity) - interpolation <- local_connectivity - index - if (index > 0) { - if (interpolation <= tol) { - rho[i] <- non_zero_dists[index] - } - else { - rho[i] <- non_zero_dists[index] + interpolation * - (non_zero_dists[index + 1] - non_zero_dists[index]) - } - } - else { - rho[i] <- interpolation * non_zero_dists[1] - } - } else if (length(non_zero_dists) > 0) { - rho[i] <- max(non_zero_dists) - } - else { - rho[i] <- 0.0 - } - - for (iter in 1:n_iter) { - psum <- 0.0 - for (j in 2:ncol(nn_dist)) { - dist <- max(0, (nn_dist[i, j] - rho[i])) - psum <- psum + exp(-(dist / mid)) - } - val <- psum - - if (abs(val - target) < tol) { - break - } - - if (val > target) { - hi <- mid - mid <- (lo + hi) / 2.0 - } - else { - lo <- mid - if (is.infinite(hi)) { - mid <- mid * 2 - } - else { - mid <- (lo + hi) / 2.0 - } - } - } - sigma[i] <- mid - - if (rho[i] > 0.0) { - sigma[i] <- max(sigma[i], min_k_dist_scale * mean(ith_distances)) - } - else { - if (is.null(mean_distances)) { - mean_distances <- mean(nn_dist) - } - sigma[i] <- max(sigma[i], min_k_dist_scale * mean_distances) - } - - prow <- exp(-(nn_dist[i, ] - rho[i]) / (sigma[i] * bandwidth)) - prow[nn_dist[i, ] - rho[i] <= 0] <- 1 - P[i, nn_idx[i, ]] <- prow - } - diag(P) <- 0 - - if (verbose) { - summarize(sigma, "sigma summary", verbose = verbose) - } - list(sigma = sigma, rho = rho, P = P) - } - - -# set_op_mix_ratio = between 0 and 1 mixes in fuzzy set intersection -# set to 0 for intersection only -fuzzy_set_union <- function(X, set_op_mix_ratio = 1) { - XX <- X * t(X) - set_op_mix_ratio * (X + t(X) - XX) + (1 - set_op_mix_ratio) * XX -} - -init_ab <- function(cost, spread = 1, min_dist = 0.001, verbose = FALSE) { - ab_params <- find_ab_params(spread = spread, min_dist = min_dist) - a <- ab_params[1] - b <- ab_params[2] - if (verbose) { - message("Umap curve parameters = ", formatC(a), ", ", formatC(b)) - } - cost$a <- a - cost$b <- b - cost -} diff --git a/smallvis/R/umap.R b/smallvis/R/umap.R new file mode 100644 index 0000000..ef8eacc --- /dev/null +++ b/smallvis/R/umap.R @@ -0,0 +1,386 @@ +# UMAP -------------------------------------------------------------------- + +umap <- function(perplexity, + inp_kernel = "skd", + symmetrize = "umap", + spread = 1, + min_dist = 0.001, + gr_eps = 0.1, + eps = 1e-9, + row_weight = NULL, + n_threads = 0, + use_cpp = FALSE) { + if (!is.null(row_weight)) { + row_normalize <- row_weight + } else { + row_normalize <- FALSE + } + lreplace( + tsne(perplexity, use_cpp = use_cpp, n_threads = n_threads), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = FALSE, + row_normalize = row_normalize, + n_threads = n_threads, + verbose = verbose, + ret_extra = ret_extra, + use_cpp = use_cpp + ) + + cost <- init_ab(cost, + spread = spread, + min_dist = min_dist, + verbose = verbose + ) + + cost$eps <- eps + cost + }, + cache_input = function(cost) { + P <- cost$P + eps <- cost$eps + cost$Cp <- colSums(P * log(P + eps) + (1 - P) * log1p(-P + eps)) + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + + P <- cost$P + eps <- cost$eps + W <- cost$W + cost$pcost <- colSums(-P * log(W + eps) - (1 - P) * log1p(-W + eps)) + cost$Cp + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + + cost$G <- k2g(Y, 4 * (cost$b / (cost$D2 + cost$eps + gr_eps)) * (cost$P - cost$W)) + cost + }, + update = function(cost, Y) { + D2 <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + D2[D2 < 0] <- 0 + + W <- 1 / (1 + cost$a * D2^cost$b) + diag(W) <- 0 + + cost$W <- W + cost$D2 <- D2 + cost + }, + export = cost_export + ) +} + +# UMAP with the output kernel fixed to the t-distribution +tumap <- function(perplexity, + inp_kernel = "skd", + symmetrize = "umap", + gr_eps = 0.1, + eps = 1e-9, + row_weight = NULL, + n_threads = 0, + use_cpp = FALSE) { + if (!is.null(row_weight)) { + row_normalize <- row_weight + } else { + row_normalize <- FALSE + } + lreplace( + umap(perplexity, n_threads = n_threads, use_cpp = use_cpp), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + cost$eps <- eps + + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = FALSE, + row_normalize = row_normalize, + n_threads = n_threads, + verbose = verbose, + ret_extra = ret_extra, + use_cpp = use_cpp + ) + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + + cost$G <- k2g(Y, 4 * (cost$W / ((1 - cost$W) + cost$eps + gr_eps)) * (cost$P - cost$W)) + cost + }, + update = function(cost, Y) { + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + W <- 1 / (1 + W) + diag(W) <- 0 + + cost$W <- W + cost + } + ) +} + +# t-UMAP where output and input affinities are normalized +ntumap <- function(perplexity, + inp_kernel = "skd", + symmetrize = "umap", + gr_eps = 0.1, + eps = 1e-9, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + tumap(perplexity, use_cpp = use_cpp, n_threads = n_threads), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + cost$eps <- eps + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = TRUE, + n_threads = n_threads, + verbose = verbose, + ret_extra = ret_extra, + use_cpp = use_cpp + ) + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + + P <- cost$P + eps <- cost$eps + Q <- cost$Q + + cost$pcost <- colSums(-P * logm(Q, eps) - (1 - P) * log1p(-Q + eps)) + cost$Cp + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$G <- k2g(Y, 4 * cost$W * (cost$C - cost$sumC * cost$Q)) + cost + }, + update = function(cost, Y) { + P <- cost$P + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + W <- 1 / (1 + W) + diag(W) <- 0 + + Q <- W / sum(W) + C <- (P - Q) / (1 - Q) + sumC <- sum(C) + + cost$W <- W + cost$Q <- Q + cost$C <- C + cost$sumC <- sumC + + cost + }, + export = cost_export + ) +} + +# Fits a kernel for the output distances of the form w = 1 / (1 + a dsq ^ b) +# where dsq is the squared Euclidean distance. +# Standard t-SNE function is a = 1, b = 1. +# Default UMAP values are a = 1.929, b = 0.7915. +find_ab_params <- function(spread = 1, min_dist = 0.001) { + xv <- seq( + from = 0, + to = spread * 3, + length.out = 300 + ) + yv <- rep(0, length(xv)) + yv[xv < min_dist] <- 1 + yv[xv >= min_dist] <- exp(-(xv[xv >= min_dist] - min_dist) / spread) + result <- try( + { + stats::nls(yv ~ 1 / (1 + a * xv^(2 * b)), start = list(a = 1, b = 1))$m$getPars() + }, + silent = TRUE + ) + if (methods::is(result, "try-error")) { + stop("Can't find a, b for provided spread/min_dist values") + } + result +} + +# The UMAP equivalent of perplexity calibration in x2aff. k is continuous rather +# than integral and so is analogous to perplexity. +# Some differences: +# 1. The target value is the log2 of k, not the Shannon entropy associated +# with the desired perplexity. +# 2. Input weights are exponential, rather than Gaussian, with respect to the +# distances. The distances are also centered with respect to the smoothed +# distance to the nearest (non-zero distance) neighbor. A non-integral +# 'local_connectivity' value can result in this shortest distance between an +# interpolated value between two distances. +# 3. The weights are not normalized. Their raw sum is compared to the target +# value. +# 4. Distances beyond the k-nearest neighbors are not used in the calibration. +# The equivalent weights are set to 0. +# 5. Weights associated with distances shorter than the smoothed nearest +# neighbor distance are clamped to 1. +# This code has been converted from the original Python and may not be very +# idiomatic (or vectorizable). +# tol is SMOOTH_K_TOLERANCE in the Python code. +smooth_knn_distances <- + function(X, + k, + n_iter = 64, + local_connectivity = 1.0, + bandwidth = 1.0, + tol = 1e-5, + min_k_dist_scale = 1e-3, + cardinality = log2(k), + n_threads = 0, + verbose = FALSE) { + tsmessage("Commencing smooth kNN distance calibration for k = ", formatC(k)) + + if (methods::is(X, "dist")) { + X <- as.matrix(X) + nn_idx <- t(apply(X, 2, order))[, 1:k] + nn_dist <- matrix(0, nrow = nrow(X), ncol = k) + for (i in 1:nrow(nn_idx)) { + nn_dist[i, ] <- X[i, nn_idx[i, ]] + } + } else { + tsmessage("Finding ", k + 1, " nearest neighbors") + knn <- rnndescent::brute_force_knn(X, k = k, n_threads = n_threads) + knn$idx <- knn$idx[, 2:k] + knn$dist <- knn$dist[, 2:k] + + nn_idx <- matrix(nrow = nrow(X), ncol = k) + nn_idx[, 1] <- 1:nrow(nn_idx) + nn_idx[, 2:ncol(nn_idx)] <- knn$idx + nn_dist <- matrix(0, nrow = nrow(X), ncol = k) + nn_dist[, 2:ncol(nn_dist)] <- knn$dist + } + + n <- nrow(nn_dist) + target <- cardinality * bandwidth + rho <- rep(0, n) + sigma <- rep(0, n) + P <- matrix(0, nrow = n, ncol = n) + mean_distances <- NULL + + for (i in 1:n) { + lo <- 0.0 + hi <- Inf + mid <- 1.0 + + ith_distances <- nn_dist[i, ] + non_zero_dists <- ith_distances[ith_distances > 0.0] + if (length(non_zero_dists) >= local_connectivity) { + index <- floor(local_connectivity) + interpolation <- local_connectivity - index + if (index > 0) { + if (interpolation <= tol) { + rho[i] <- non_zero_dists[index] + } else { + rho[i] <- non_zero_dists[index] + interpolation * + (non_zero_dists[index + 1] - non_zero_dists[index]) + } + } else { + rho[i] <- interpolation * non_zero_dists[1] + } + } else if (length(non_zero_dists) > 0) { + rho[i] <- max(non_zero_dists) + } else { + rho[i] <- 0.0 + } + + for (iter in 1:n_iter) { + psum <- 0.0 + for (j in 2:ncol(nn_dist)) { + dist <- max(0, (nn_dist[i, j] - rho[i])) + psum <- psum + exp(-(dist / mid)) + } + val <- psum + + if (abs(val - target) < tol) { + break + } + + if (val > target) { + hi <- mid + mid <- (lo + hi) / 2.0 + } else { + lo <- mid + if (is.infinite(hi)) { + mid <- mid * 2 + } else { + mid <- (lo + hi) / 2.0 + } + } + } + sigma[i] <- mid + + if (rho[i] > 0.0) { + sigma[i] <- max(sigma[i], min_k_dist_scale * mean(ith_distances)) + } else { + if (is.null(mean_distances)) { + mean_distances <- mean(nn_dist) + } + sigma[i] <- max(sigma[i], min_k_dist_scale * mean_distances) + } + + prow <- exp(-(nn_dist[i, ] - rho[i]) / (sigma[i] * bandwidth)) + prow[nn_dist[i, ] - rho[i] <= 0] <- 1 + P[i, nn_idx[i, ]] <- prow + } + diag(P) <- 0 + + if (verbose) { + summarize(sigma, "sigma summary", verbose = verbose) + } + list(sigma = sigma, rho = rho, P = P) + } + + +# set_op_mix_ratio = between 0 and 1 mixes in fuzzy set intersection +# set to 0 for intersection only +fuzzy_set_union <- function(X, set_op_mix_ratio = 1) { + XX <- X * t(X) + set_op_mix_ratio * (X + t(X) - XX) + (1 - set_op_mix_ratio) * XX +} + +init_ab <- function(cost, + spread = 1, + min_dist = 0.001, + verbose = FALSE) { + ab_params <- find_ab_params(spread = spread, min_dist = min_dist) + a <- ab_params[1] + b <- ab_params[2] + if (verbose) { + message("Umap curve parameters = ", formatC(a), ", ", formatC(b)) + } + cost$a <- a + cost$b <- b + cost +} From 6436bd2b7433c93dee6f9767ebefe5c120c27bee Mon Sep 17 00:00:00 2001 From: James Melville Date: Sat, 3 Aug 2024 11:30:11 -0700 Subject: [PATCH 14/26] formatting --- smallvis/R/init.R | 112 +++++++++++-------- smallvis/R/perplexity.R | 233 +++++++++++++++++++++++----------------- smallvis/R/util.R | 46 ++++---- 3 files changed, 228 insertions(+), 163 deletions(-) diff --git a/smallvis/R/init.R b/smallvis/R/init.R index 2c83c43..2272da4 100644 --- a/smallvis/R/init.R +++ b/smallvis/R/init.R @@ -1,16 +1,21 @@ # Output Initialization --------------------------------------------------- # Initialization of the output coordinates -init_out <- function(Y_init, X, n, ndim, pca_preprocessed, verbose = FALSE) { +init_out <- function(Y_init, + X, + n, + ndim, + pca_preprocessed, + verbose = FALSE) { switch(Y_init, - pca = { - tsmessage("Initializing from PCA scores") - pca_init(X, ndim, pca_preprocessed, verbose) - }, - rand = { - tsmessage("Initializing from random Gaussian") - matrix(stats::rnorm(ndim * n, sd = 1), n) - } + pca = { + tsmessage("Initializing from PCA scores") + pca_init(X, ndim, pca_preprocessed, verbose) + }, + rand = { + tsmessage("Initializing from random Gaussian") + matrix(stats::rnorm(ndim * n, sd = 1), n) + } ) } @@ -18,8 +23,7 @@ pca_init <- function(X, ndim, pca_preprocessed, verbose = FALSE) { # If we've already done PCA, we can just take the first two columns if (pca_preprocessed) { X[, 1:2] - } - else { + } else { pca_scores(X, ncol = ndim, verbose = verbose) } } @@ -43,37 +47,38 @@ pca_init <- function(X, ndim, pca_preprocessed, verbose = FALSE) { # top eigenvectors to be extracted. Otherwise, use the slower eigen routine. # A must be symmetric and positive semi definite, but not necessarily # normalized in any specific way. -laplacian_eigenmap <- function(A, ndim = 2, use_RSpectra = TRUE) { +laplacian_eigenmap <- function(A, + ndim = 2, + use_RSpectra = TRUE) { # Equivalent to: D <- diag(colSums(A)); M <- solve(D) %*% A # This effectively row-normalizes A: colSums is normally faster than rowSums # and because A is symmetric, they're equivalent M <- A / colSums(A) - if (use_RSpectra && requireNamespace("RSpectra", quietly = TRUE, - warn.conflicts = FALSE)) { + if (use_RSpectra && requireNamespace("RSpectra", quietly = TRUE, warn.conflicts = FALSE)) { tsmessage("Using RSpectra for eigenvectors") Re(RSpectra::eigs(M, k = ndim + 1)$vectors[, 2:(ndim + 1)]) - } - else { + } else { tsmessage("Using eigen for eigenvectors") eigen(M, symmetric = FALSE)$vectors[, 2:(ndim + 1)] } } # Use a normalized Laplacian. The UMAP approach, taken from version 0.2.1. -normalized_spectral_init <- function(A, ndim = 2, use_RSpectra = TRUE) { +normalized_spectral_init <- function(A, + ndim = 2, + use_RSpectra = TRUE) { n <- nrow(A) # Normalized Laplacian: clear and close to UMAP code, but very slow in R # I <- diag(1, nrow = n, ncol = n) # D <- diag(1 / sqrt(colSums(A))) # L <- I - D %*% A %*% D - + # A lot faster (order of magnitude when n = 1000) Dsq <- sqrt(colSums(A)) L <- -t(A / Dsq) / Dsq diag(L) <- 1 + diag(L) - - if (use_RSpectra && requireNamespace("RSpectra", quietly = TRUE, - warn.conflicts = FALSE)) { + + if (use_RSpectra && requireNamespace("RSpectra", quietly = TRUE, warn.conflicts = FALSE)) { tsmessage("Using RSpectra for eigenvectors") k <- ndim + 1 ncv <- max(2 * k + 1, floor(sqrt(n))) @@ -82,11 +87,14 @@ normalized_spectral_init <- function(A, ndim = 2, use_RSpectra = TRUE) { maxitr = 5 * n, tol = 1e-4 ) - res <- RSpectra::eigs(L, k = k, which = "SM", opt = opt) + res <- RSpectra::eigs(L, + k = k, + which = "SM", + opt = opt + ) vec_indices <- rev(order(res$values, decreasing = TRUE)[1:ndim]) res <- Re(res$vectors[, vec_indices]) - } - else { + } else { tsmessage("Using eigen for eigenvectors") res <- eigen(L, symmetric = FALSE) vec_indices <- order(res$values, decreasing = FALSE)[2:(ndim + 1)] @@ -111,41 +119,52 @@ shrink_coords <- function(X, sdev = 1e-4) { # Calculates a matrix containing the first ncol columns of the PCA scores. # Returns the score matrix unless ret_extra is TRUE, in which case a list # is returned also containing the eigenvalues -pca_scores <- function(X, ncol = min(dim(X)), verbose = FALSE, +pca_scores <- function(X, + ncol = min(dim(X)), + verbose = FALSE, ret_extra = FALSE) { if (methods::is(X, "dist")) { - res_mds <- stats::cmdscale(X, x.ret = TRUE, eig = TRUE, k = ncol) - + res_mds <- stats::cmdscale(X, + x.ret = TRUE, + eig = TRUE, + k = ncol + ) + if (ret_extra || verbose) { lambda <- res_mds$eig varex <- sum(lambda[1:ncol]) / sum(lambda) - tsmessage("PCA (using classical MDS): ", ncol, " components explained ", - formatC(varex * 100), "% variance") + tsmessage( + "PCA (using classical MDS): ", + ncol, + " components explained ", + formatC(varex * 100), + "% variance" + ) } scores <- res_mds$points - } - else { + } else { X <- scale(X, center = TRUE, scale = FALSE) # do SVD on X directly rather than forming covariance matrix s <- svd(X, nu = ncol, nv = 0) D <- diag(c(s$d[1:ncol])) if (verbose || ret_extra) { # calculate eigenvalues of covariance matrix from singular values - lambda <- (s$d ^ 2) / (nrow(X) - 1) + lambda <- (s$d^2) / (nrow(X) - 1) varex <- sum(lambda[1:ncol]) / sum(lambda) - tsmessage("PCA: ", ncol, " components explained ", formatC(varex * 100), - "% variance") + tsmessage( + "PCA: ", + ncol, + " components explained ", + formatC(varex * 100), + "% variance" + ) } scores <- s$u %*% D } - + if (ret_extra) { - list( - scores = scores, - lambda = lambda[1:ncol] - ) - } - else { + list(scores = scores, lambda = lambda[1:ncol]) + } else { scores } } @@ -153,7 +172,14 @@ pca_scores <- function(X, ncol = min(dim(X)), verbose = FALSE, # Whiten the data by PCA. This both reduces the dimensionality, but also # scales the scores by the inverse square root of the equivalent eigenvalue # so that the variance of each column is 1. -pca_whiten <- function(X, ncol = min(dim(X)), eps = 1e-5, verbose = FALSE) { - pca <- pca_scores(X, ncol = ncol, verbose = verbose, ret_extra = TRUE) +pca_whiten <- function(X, + ncol = min(dim(X)), + eps = 1e-5, + verbose = FALSE) { + pca <- pca_scores(X, + ncol = ncol, + verbose = verbose, + ret_extra = TRUE + ) sweep(pca$scores, 2, sqrt(pca$lambda + eps), "/") } diff --git a/smallvis/R/perplexity.R b/smallvis/R/perplexity.R index 9dfe5d0..3e76e31 100644 --- a/smallvis/R/perplexity.R +++ b/smallvis/R/perplexity.R @@ -7,48 +7,49 @@ # NB set default kernel to "exp" to get results closer to the tsne package. # This differs from the procedure in the t-SNE paper by exponentially weighting # the distances, rather than the squared distances. -x2aff <- function(X, perplexity = 15, tol = 1e-5, kernel = "gauss", - verbose = FALSE, guesses = NULL) { +x2aff <- function(X, + perplexity = 15, + tol = 1e-5, + kernel = "gauss", + verbose = FALSE, + guesses = NULL) { x_is_dist <- methods::is(X, "dist") if (x_is_dist) { D <- X n <- attr(D, "Size") - + D <- as.matrix(D) if (kernel == "gauss") { D <- D * D } - } - else { + } else { XX <- rowSums(X * X) n <- nrow(X) } - + nperps <- length(perplexity) if (nperps > 1 && nperps != n) { stop("Must provide one perplexity per point") } - + if (!is.null(guesses) && length(guesses) != n) { stop("Initial guess vector must match number of observations in X") } - + W <- matrix(0, n, n) intd <- rep(0, n) if (!is.null(guesses)) { beta <- guesses - } - else { + } else { beta <- rep(1, n) } if (nperps == 1) { logU <- log(perplexity) - } - else { + } else { perps <- perplexity } bad_perp <- 0 - + for (i in 1:n) { if (nperps > 1) { perplexity <- perps[i] @@ -56,18 +57,17 @@ x2aff <- function(X, perplexity = 15, tol = 1e-5, kernel = "gauss", } betamin <- -Inf betamax <- Inf - + if (x_is_dist) { Di <- D[i, -i] - } - else { + } else { Di <- (XX[i] + XX - 2 * colSums(tcrossprod(X[i, ], X)))[-i] Di[Di < 0] <- 0 if (kernel == "exp") { Di <- sqrt(Di) } } - + # If we haven't been provided with guesses, then try the initialization used # for all points in ELKI according to Schubert & Gertz in "Intrinsic # t-Stochastic Neighbor Embedding for Visualization and Outlier Detection: A @@ -77,12 +77,12 @@ x2aff <- function(X, perplexity = 15, tol = 1e-5, kernel = "gauss", if (is.null(guesses) && i == 1) { beta[1] <- 0.5 * perplexity / mean(Di) } - + sres <- shannon(Di, beta[i]) H <- sres$H Wi <- sres$W sumWi <- sres$Z - + Hdiff <- H - logU tries <- 0 while (abs(Hdiff) > tol && tries < 50) { @@ -101,12 +101,12 @@ x2aff <- function(X, perplexity = 15, tol = 1e-5, kernel = "gauss", beta[i] <- (beta[i] + betamin) / 2 } } - + sres <- shannon(Di, beta[i]) H <- sres$H Wi <- sres$W sumWi <- sres$Z - + Hdiff <- H - logU tries <- tries + 1 } @@ -114,14 +114,13 @@ x2aff <- function(X, perplexity = 15, tol = 1e-5, kernel = "gauss", # Put a weight of 1/perplexity on the perplexity-nearest neighbors bad_perp <- bad_perp + 1 knn_idx <- order(Di, decreasing = FALSE)[1:max(floor(perplexity), 1)] - knn_idx[knn_idx >= i] <- knn_idx[knn_idx >= i] + 1 + knn_idx[knn_idx >= i] <- knn_idx[knn_idx >= i] + 1 Wi <- rep(0, n) Wi[knn_idx] <- 1 / floor(perplexity) - + intd[i] <- 0 W[i, ] <- Wi - } - else { + } else { # if we didn't supply estimates for beta manually, then initialize guess for # next point with optimized beta for this point: doesn't save many # iterations, but why not? @@ -131,15 +130,14 @@ x2aff <- function(X, perplexity = 15, tol = 1e-5, kernel = "gauss", intd[i] <- intd_x2aff(Di, beta[i], Wi, sumWi, H) W[i, -i] <- Wi } - } sigma <- sqrt(1 / beta) - + if (bad_perp > 0) { tsmessage("Warning: ", bad_perp, " perplexity calibrations failed!") warning(bad_perp, " perplexity calibrations failed") } - + if (verbose) { summarize(sigma, "sigma summary", verbose = verbose) summarize(intd, "Dint", verbose = verbose) @@ -156,37 +154,34 @@ x2aff <- function(X, perplexity = 15, tol = 1e-5, kernel = "gauss", shannon <- function(D2, beta) { W <- exp(-D2 * beta) Z <- sum(W) - + if (Z == 0) { H <- 0 - } - else { + } else { H <- log(Z) + beta * sum(D2 * W) / Z } - list( - W = W, - Z = Z, - H = H - ) + list(W = W, Z = Z, H = H) } -x2aff_sigma <- function(X, sigma = 1e-3, verbose = FALSE, use_cpp = FALSE, +x2aff_sigma <- function(X, + sigma = 1e-3, + verbose = FALSE, + use_cpp = FALSE, n_threads = 1) { x_is_dist <- methods::is(X, "dist") if (x_is_dist) { D <- X - + D <- as.matrix(D) D <- D * D - } - else { + } else { D <- calc_d2(X, use_cpp = use_cpp, n_threads = n_threads) } beta <- 1 / (sigma * sigma) sres <- shannon(D, beta) W <- sres$W diag(W) <- 0 - + list(W = W, beta = beta) } @@ -209,8 +204,7 @@ knn_dist <- function(X, k, n_threads, verbose) { Di[Di > kdists[i]] <- Inf D[, i] <- Di } - } - else { + } else { # Find the k-nearest indexes and distances of X, and set the corresponding # distance matrix elements n <- nrow(X) @@ -221,14 +215,14 @@ knn_dist <- function(X, k, n_threads, verbose) { knn <- rnndescent::brute_force_knn(X, k = k + 1, n_threads = n_threads) knn$idx <- knn$idx[, 2:(k + 1)] knn$dist <- knn$dist[, 2:(k + 1)] - + D <- matrix(Inf, nrow = n, ncol = n) diag(D) <- 0 for (i in 1:n) { D[i, knn$idx[i, ]] <- knn$dist[i, ] } } - + # symmetrize pmin(D, t(D)) } @@ -250,19 +244,18 @@ knn_graph <- function(X, k, n_threads, verbose) { Di[Di > kdists[i]] <- 0 D[, i] <- 1 } - } - else { + } else { # Find the k-nearest indexes and distances of X, and set the corresponding # distance matrix elements n <- nrow(X) if (k > n - 1) { stop("k must be not be > n - 1") } - + tsmessage("Finding ", k + 1, " nearest neighbors") knn <- rnndescent::brute_force_knn(X, k = k + 1, n_threads = n_threads) knn$idx <- knn$idx[, 2:(k + 1)] - + D <- matrix(0, nrow = n, ncol = n) for (i in 1:n) { D[i, knn$idx[i, ]] <- 1 @@ -273,19 +266,26 @@ knn_graph <- function(X, k, n_threads, verbose) { # Given data X and k nearest neighbors, return a geodisic distance matrix # Disconnections are treated by using the Euclidean distance. -geodesic <- function(X, k, fill = TRUE, use_cpp = FALSE, n_threads = 0, +geodesic <- function(X, + k, + fill = TRUE, + use_cpp = FALSE, + n_threads = 0, verbose = FALSE) { tsmessage("Calculating geodesic distances with k = ", k) - + R <- knn_dist(X, k, n_threads = n_threads, verbose = verbose) # The hard work is done by Rfast's implementation of Floyd's algorithm G <- Rfast::floyd(R) if (any(is.infinite(G)) && fill) { - tsmessage("k = ", k, " resulted in disconnections: filling with Euclidean distances") + tsmessage( + "k = ", + k, + " resulted in disconnections: filling with Euclidean distances" + ) if (methods::is(X, "dist")) { R <- as.matrix(X) - } - else { + } else { R <- calc_d(X, use_cpp = use_cpp, n_threads = n_threads) } G[is.infinite(G)] <- R[is.infinite(G)] @@ -293,54 +293,73 @@ geodesic <- function(X, k, fill = TRUE, use_cpp = FALSE, n_threads = 0, G } -# Multiscale perplexities: P is an average over the results of multiple +# Multiscale perplexities: P is an average over the results of multiple # perplexities # as described by de Bodt et al in # Perplexity-free t-SNE and twice Student tt-SNE (2018) -msp <- function(X, perplexities = NULL, tol = 1e-5, - symmetrize = "symmetric", row_normalize = TRUE, +msp <- function(X, + perplexities = NULL, + tol = 1e-5, + symmetrize = "symmetric", + row_normalize = TRUE, normalize = TRUE, - verbose = FALSE, guesses = NULL) { + verbose = FALSE, + guesses = NULL) { if (methods::is(X, "dist")) { n <- attr(X, "Size") - } - else { + } else { n <- nrow(X) } - + if (is.null(perplexities)) { perplexities <- idp_perps(n) } - tsmessage("Calculating multi-scale P with perplexities from ", - formatC(perplexities[1]), " to ", formatC(last(perplexities))) - + tsmessage( + "Calculating multi-scale P with perplexities from ", + formatC(perplexities[1]), + " to ", + formatC(last(perplexities)) + ) + res <- NULL for (perplexity in perplexities) { - tsmessage("Commencing calibration for perplexity = ", - format_perps(perplexity)) - x2a_res <- x2aff(X = X, perplexity = perplexity, tol = tol, kernel = "gauss", - verbose = verbose, guesses = guesses) + tsmessage( + "Commencing calibration for perplexity = ", + format_perps(perplexity) + ) + x2a_res <- x2aff( + X = X, + perplexity = perplexity, + tol = tol, + kernel = "gauss", + verbose = verbose, + guesses = guesses + ) P <- x2a_res$W - Q <- scale_affinities(P, - symmetrize = "symmetric", row_normalize = TRUE, - normalize = TRUE) + Q <- scale_affinities( + P, + symmetrize = "symmetric", + row_normalize = TRUE, + normalize = TRUE + ) if (is.null(res)) { res$P <- Q - } - else { + } else { res$P <- res$P + Q } } - + if (length(perplexities) > 1) { res$P <- res$P / length(perplexities) } - + if (is.logical(row_normalize)) { - tsmessage("Effective perplexity of multiscale P approx = ", - formatC(stats::median(perpp(res$P)))) + tsmessage( + "Effective perplexity of multiscale P approx = ", + formatC(stats::median(perpp(res$P))) + ) } - + res } @@ -348,38 +367,51 @@ msp <- function(X, perplexities = NULL, tol = 1e-5, # Scan through the provided perplexities and use the result which maximizes # the mean correlation dimension (which is an estimate for the intrinsic # dimensionality). Stops at the first maxmimum found. -idp <- function(X, perplexities = NULL, tol = 1e-5, - verbose = FALSE, guesses = NULL) { +idp <- function(X, + perplexities = NULL, + tol = 1e-5, + verbose = FALSE, + guesses = NULL) { if (methods::is(X, "dist")) { n <- attr(X, "Size") - } - else { + } else { n <- nrow(X) } - + if (is.null(perplexities)) { perplexities <- idp_perps(n) } if (verbose) { - tsmessage("Searching for intrinsic dimensionality with perplexities from ", - formatC(perplexities[1]), " to ", formatC(last(perplexities))) + tsmessage( + "Searching for intrinsic dimensionality with perplexities from ", + formatC(perplexities[1]), + " to ", + formatC(last(perplexities)) + ) } - + corr_dim_max <- -Inf idp <- 0 idp_res <- NULL for (perplexity in perplexities) { if (verbose) { - tsmessage("Commencing calibration for perplexity = ", - format_perps(perplexity)) + tsmessage( + "Commencing calibration for perplexity = ", + format_perps(perplexity) + ) } - x2a_res <- x2aff(X = X, perplexity = perplexity, tol = tol, kernel = "gauss", - verbose = verbose, guesses = guesses) + x2a_res <- x2aff( + X = X, + perplexity = perplexity, + tol = tol, + kernel = "gauss", + verbose = verbose, + guesses = guesses + ) corr_dim <- mean(x2a_res$dint) if (corr_dim <= corr_dim_max) { break - } - else { + } else { corr_dim_max <- corr_dim idp <- perplexity idp_res <- x2a_res @@ -389,10 +421,14 @@ idp <- function(X, perplexities = NULL, tol = 1e-5, stop("Unable to find an IDP: all correlation dimensions were -ve") } if (verbose) { - tsmessage("Found IDP at perplexity = ", formatC(idp), - " intrinsic dimensionality = ", formatC(corr_dim_max)) + tsmessage( + "Found IDP at perplexity = ", + formatC(idp), + " intrinsic dimensionality = ", + formatC(corr_dim_max) + ) } - + idp_res$idp <- idp idp_res } @@ -405,7 +441,7 @@ idp_perps <- function(n) { max_u <- min(128, max(2, ceiling(n / 2))) max_uexp <- floor(log2(max_u)) min_uexp <- min(2, max_uexp) - 2 ^ (min_uexp:max_uexp) + 2^(min_uexp:max_uexp) } # Is the perplexity argument a string or a list with the first element is a @@ -415,8 +451,7 @@ perp_method <- function(perplexity) { if (is.character(perplexity) || is.list(perplexity)) { if (is.list(perplexity)) { method <- perplexity[[1]] - } - else { + } else { method <- perplexity } } @@ -431,4 +466,4 @@ user_idp_perps <- function(perplexity) { perplexities <- perplexity[[2]] } perplexities -} \ No newline at end of file +} diff --git a/smallvis/R/util.R b/smallvis/R/util.R index 9d0efc6..01d2c74 100644 --- a/smallvis/R/util.R +++ b/smallvis/R/util.R @@ -8,20 +8,22 @@ dist2 <- function(X) { D2 + sweep(X %*% t(X) * -2, 2, t(D2), `+`) } -calc_d2 <- function(X, use_cpp = FALSE, n_threads = 1) { +calc_d2 <- function(X, + use_cpp = FALSE, + n_threads = 1) { if (use_cpp) { dist2_cpp(X, n_threads = n_threads) - } - else { + } else { safe_dist2(X) } } -calc_d <- function(X, use_cpp = FALSE, n_threads = 1) { +calc_d <- function(X, + use_cpp = FALSE, + n_threads = 1) { if (use_cpp) { dist_cpp(X, n_threads = n_threads) - } - else { + } else { sqrt(safe_dist2(X)) } } @@ -33,20 +35,22 @@ safe_dist2 <- function(X) { D2 } -calc_d2tweight <- function(D2, use_cpp = FALSE, n_threads = 1) { +calc_d2tweight <- function(D2, + use_cpp = FALSE, + n_threads = 1) { if (use_cpp) { d2_to_tweight_cpp(D2, n_threads = n_threads) - } - else { + } else { 1 / (1 + D2) } } -calc_tweight <- function(X, use_cpp = FALSE, n_threads = 1) { +calc_tweight <- function(X, + use_cpp = FALSE, + n_threads = 1) { if (use_cpp) { tweight_cpp(X, n_threads = n_threads) - } - else { + } else { D2 <- calc_d2(X, use_cpp = use_cpp, n_threads = n_threads) 1 / (1 + D2) } @@ -63,10 +67,13 @@ stime <- function() { } # message with a time stamp -tsmessage <- function(..., domain = NULL, appendLF = TRUE, force = FALSE, +tsmessage <- function(..., + domain = NULL, + appendLF = TRUE, + force = FALSE, time_stamp = TRUE) { verbose <- get0("verbose", envir = sys.parent()) - + if (force || (!is.null(verbose) && verbose)) { msg <- "" if (time_stamp) { @@ -115,18 +122,15 @@ nnat <- function(x) { # log vector information summarize <- function(X, msg = "", verbose = FALSE) { summary_X <- summary(X, digits = max(3, getOption("digits") - 3)) - tsmessage(msg, ": ", paste(names(summary_X), ":", summary_X, "|", - collapse = "")) + tsmessage(msg, ": ", paste(names(summary_X), ":", summary_X, "|", collapse = "")) } # Format perplexity as a string. Could be a scalar or a vector. In the latter # case, just list the first two values and then ellipses format_perps <- function(perplexity) { if (length(perplexity) > 1) { - paste0(formatC(perplexity[1]), ", ", - formatC(perplexity[2]), "...") - } - else { + paste0(formatC(perplexity[1]), ", ", formatC(perplexity[2]), "...") + } else { formatC(perplexity) } } @@ -139,4 +143,4 @@ last <- function(x) { # remove NULL items from a list remove_nulls <- function(l) { l[!vapply(l, is.null, logical(1))] -} \ No newline at end of file +} From d9a76f67952d751a01084e86ca2906f2c6164551 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sat, 3 Aug 2024 11:39:14 -0700 Subject: [PATCH 15/26] move distance-preserving code --- smallvis/R/cost.R | 277 ---------------------------- smallvis/R/mmds.R | 395 ++++++++++++++++++++++++++++++++++++++++ smallvis/R/perplexity.R | 29 +-- 3 files changed, 396 insertions(+), 305 deletions(-) create mode 100644 smallvis/R/mmds.R diff --git a/smallvis/R/cost.R b/smallvis/R/cost.R index d1a6768..d929468 100644 --- a/smallvis/R/cost.R +++ b/smallvis/R/cost.R @@ -933,283 +933,6 @@ gsne <- function(perplexity, lambda = 1, inp_kernel = "gaussian", ) } - -# Distance Preserving Methods --------------------------------------------- - -mmds_init <- function(cost, X, max_iter, eps = .Machine$double.eps, - verbose = FALSE, ret_extra = c(), use_cpp = FALSE, - n_threads = 1) { - tsmessage("Calculating pairwise distances") - if (methods::is(X, "dist")) { - cost$R <- as.matrix(X) - } - else { - cost$R <- calc_d(X, use_cpp = use_cpp, n_threads = n_threads) - } - cost$eps <- eps - cost -} - -# Metric MDS, minimizing strain. -mmds <- function(eps = .Machine$double.eps, use_cpp = FALSE, n_threads = 1) { - list( - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - cost <- mmds_init(cost, X, max_iter, eps, verbose, ret_extra, - use_cpp = use_cpp, n_threads = n_threads) - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - cost$pcost <- colSums((cost$R - cost$D) ^ 2) - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - if (use_cpp) { - cost$G <- mmds_grad_cpp(cost$R, cost$D, Y, eps = eps, n_threads = n_threads) - } - else { - cost$G <- k2g(Y, -4 * (cost$R - cost$D) / (cost$D + cost$eps)) - } - cost - }, - update = function(cost, Y) { - cost$D <- calc_d(Y, use_cpp = use_cpp, n_threads = n_threads) - cost - }, - sentinel = "D", - export = function(cost, val) { - res <- NULL - switch(val, - dx = { - res <- cost$R - }, - dy = { - res <- cost$D - } - ) - res - } - ) -} - -smmds <- function(eps = .Machine$double.eps, use_cpp = FALSE, n_threads = 1) { - lreplace( - mmds(use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, eps = .Machine$double.eps, verbose = FALSE, - ret_extra = c()) { - cost <- mmds_init(cost, X, max_iter, eps, verbose, ret_extra, - use_cpp = use_cpp, n_threads = n_threads) - cost$R2 <- cost$R * cost$R - cost$R <- NULL - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - cost$pcost <- colSums((cost$R2 - cost$D2) ^ 2) - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - cost$G <- k2g(Y, -8 * (cost$R2 - cost$D2)) - cost - }, - export = function(cost, val) { - res <- NULL - switch(val, - dx = { - res <- cost$R2 - res[res < 0] <- 0 - sqrt(res) - }, - dy = { - res <- cost$D2 - res[res < 0] <- 0 - sqrt(res) - } - ) - res - }, - update = function(cost, Y) { - cost$D2 <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - cost - }, - sentinel = "D2" - ) -} - -sammon <- function(eps = .Machine$double.eps, use_cpp = FALSE, n_threads = 1) { - lreplace(mmds(eps = eps, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - cost <- mmds_init(cost, X, max_iter, eps, verbose, ret_extra, - use_cpp = use_cpp, n_threads = n_threads) - cost$rsum_inv <- 1 / sum(cost$R) - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - cost$pcost <- colSums((cost$R - cost$D) ^ 2 / (cost$R + cost$eps)) * cost$rsum_inv - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - cost$G <- k2g(Y, -4 * cost$rsum_inv * (cost$R - cost$D) / (cost$R * cost$D + cost$eps)) - cost - } - ) -} - - -gmmds <- function(k, eps = .Machine$double.eps, use_cpp = FALSE, n_threads = 0) { - lreplace( - mmds(use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, - ret_extra = c()) { - cost$R <- geodesic(X, k, n_threads = n_threads, use_cpp = use_cpp, - verbose = verbose) - cost$eps <- eps - cost - }, - export = function(cost, val) { - res <- NULL - switch(val, - geo = { - res <- cost$R - }, - dy = { - res <- cost$D - } - ) - res - } - ) -} - -# Define neighborhoods using a radius based on a fraction (f) of all input -# distances (sorted by increasing length), don't correct non-neighborhood -# distances unless they smaller than the input distance -ballmmds <- function(f = 0.1, eps = .Machine$double.eps, use_cpp = FALSE, - n_threads = 1) { - lreplace( - mmds(use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - cost <- mmds_init(cost = cost, X = X, max_iter = max_iter, eps = eps, - verbose = verbose, ret_extra = ret_extra, - use_cpp = use_cpp, n_threads = n_threads) - rs <- cost$R[upper.tri(cost$R)] - rmax <- Rfast::nth(rs, max(1, round(f * length(rs)))) - if (verbose) { - tsmessage("f = ", formatC(f), " rmax = ", formatC(rmax)) - } - cost$rmax <- rmax - - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - - R <- cost$R - D <- cost$D - rmax <- cost$rmax - Ddiff <- R - D - Ddiff[R > rmax & D > R] <- 0 - cost$pcost <- colSums(Ddiff * Ddiff) - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - eps <- cost$eps - R <- cost$R - D <- cost$D - - K <- -4 * (R - D) / (D + eps) - - rmax <- cost$rmax - K[R > rmax & D > R] <- 0 - - cost$G <- k2g(Y, K) - cost - }, - export = function(cost, val) { - res <- NULL - switch(val, - geo = { - res <- cost$R - }, - dy = { - res <- cost$D - } - ) - res - }, - update = function(cost, Y) { - cost$D <- calc_d(Y) - cost - } - ) -} - -# Create the symmetrized knn graph, don't correct non-neighborhood distances -# unless they smaller than the input distance -knnmmds <- function(k, eps = .Machine$double.eps, use_cpp = FALSE, - n_threads = 0) { - lreplace( - mmds(use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - cost <- mmds_init(cost = cost, X = X, max_iter = max_iter, eps = eps, - verbose = verbose, ret_extra = ret_extra, - use_cpp = use_cpp, n_threads = n_threads) - knn <- knn_graph(X = X, k = k, n_threads = n_threads, verbose = verbose) - # symmetrize - cost$knn <- pmax(knn, t(knn)) - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - - R <- cost$R - D <- cost$D - knn <- cost$knn - Ddiff <- R - D - Ddiff[knn == 0 & D > R] <- 0 - cost$pcost <- colSums(Ddiff * Ddiff) - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - - eps <- cost$eps - R <- cost$R - D <- cost$D - - K <- -4 * (R - D) / (D + eps) - - knn <- cost$knn - K[knn == 0 & D > R] <- 0 - - cost$G <- k2g(Y, K) - cost$D <- D - cost - }, - export = function(cost, val) { - res <- NULL - switch(val, - geo = { - res <- cost$R - }, - dy = { - res <- cost$D - } - ) - res - }, - update = function(cost, Y) { - cost$D <- calc_d(Y, use_cpp = use_cpp, n_threads = n_threads) - cost - } - ) -} - # Elastic Embedding ------------------------------------------------------- # Carreira-Perpinán, M. A. (2010, June). diff --git a/smallvis/R/mmds.R b/smallvis/R/mmds.R new file mode 100644 index 0000000..032df7a --- /dev/null +++ b/smallvis/R/mmds.R @@ -0,0 +1,395 @@ +# Distance Preserving Methods --------------------------------------------- + +mmds_init <- function(cost, + X, + max_iter, + eps = .Machine$double.eps, + verbose = FALSE, + ret_extra = c(), + use_cpp = FALSE, + n_threads = 1) { + tsmessage("Calculating pairwise distances") + if (methods::is(X, "dist")) { + cost$R <- as.matrix(X) + } else { + cost$R <- calc_d(X, use_cpp = use_cpp, n_threads = n_threads) + } + cost$eps <- eps + cost +} + +# Metric MDS, minimizing strain. +mmds <- function(eps = .Machine$double.eps, + use_cpp = FALSE, + n_threads = 1) { + list( + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + cost <- mmds_init( + cost, + X, + max_iter, + eps, + verbose, + ret_extra, + use_cpp = use_cpp, + n_threads = n_threads + ) + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$pcost <- colSums((cost$R - cost$D)^2) + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + if (use_cpp) { + cost$G <- mmds_grad_cpp(cost$R, cost$D, Y, eps = eps, n_threads = n_threads) + } else { + cost$G <- k2g(Y, -4 * (cost$R - cost$D) / (cost$D + cost$eps)) + } + cost + }, + update = function(cost, Y) { + cost$D <- calc_d(Y, use_cpp = use_cpp, n_threads = n_threads) + cost + }, + sentinel = "D", + export = function(cost, val) { + res <- NULL + switch(val, + dx = { + res <- cost$R + }, + dy = { + res <- cost$D + } + ) + res + } + ) +} + +smmds <- function(eps = .Machine$double.eps, + use_cpp = FALSE, + n_threads = 1) { + lreplace( + mmds(use_cpp = use_cpp, n_threads = n_threads), + init = function(cost, + X, + max_iter, + eps = .Machine$double.eps, + verbose = FALSE, + ret_extra = c()) { + cost <- mmds_init( + cost, + X, + max_iter, + eps, + verbose, + ret_extra, + use_cpp = use_cpp, + n_threads = n_threads + ) + cost$R2 <- cost$R * cost$R + cost$R <- NULL + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$pcost <- colSums((cost$R2 - cost$D2)^2) + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$G <- k2g(Y, -8 * (cost$R2 - cost$D2)) + cost + }, + export = function(cost, val) { + res <- NULL + switch(val, + dx = { + res <- cost$R2 + res[res < 0] <- 0 + sqrt(res) + }, + dy = { + res <- cost$D2 + res[res < 0] <- 0 + sqrt(res) + } + ) + res + }, + update = function(cost, Y) { + cost$D2 <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + cost + }, + sentinel = "D2" + ) +} + +sammon <- function(eps = .Machine$double.eps, + use_cpp = FALSE, + n_threads = 1) { + lreplace( + mmds( + eps = eps, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + cost <- mmds_init( + cost, + X, + max_iter, + eps, + verbose, + ret_extra, + use_cpp = use_cpp, + n_threads = n_threads + ) + cost$rsum_inv <- 1 / sum(cost$R) + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$pcost <- colSums((cost$R - cost$D)^2 / (cost$R + cost$eps)) * cost$rsum_inv + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$G <- k2g(Y, -4 * cost$rsum_inv * (cost$R - cost$D) / (cost$R * cost$D + cost$eps)) + cost + } + ) +} + + +gmmds <- function(k, + eps = .Machine$double.eps, + use_cpp = FALSE, + n_threads = 0) { + lreplace( + mmds(use_cpp = use_cpp, n_threads = n_threads), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + cost$R <- geodesic(X, + k, + n_threads = n_threads, + use_cpp = use_cpp, + verbose = verbose + ) + cost$eps <- eps + cost + }, + export = function(cost, val) { + res <- NULL + switch(val, + geo = { + res <- cost$R + }, + dy = { + res <- cost$D + } + ) + res + } + ) +} + +# Define neighborhoods using a radius based on a fraction (f) of all input +# distances (sorted by increasing length), don't correct non-neighborhood +# distances unless they smaller than the input distance +ballmmds <- function(f = 0.1, + eps = .Machine$double.eps, + use_cpp = FALSE, + n_threads = 1) { + lreplace( + mmds(use_cpp = use_cpp, n_threads = n_threads), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + cost <- mmds_init( + cost = cost, + X = X, + max_iter = max_iter, + eps = eps, + verbose = verbose, + ret_extra = ret_extra, + use_cpp = use_cpp, + n_threads = n_threads + ) + rs <- cost$R[upper.tri(cost$R)] + rmax <- Rfast::nth(rs, max(1, round(f * length(rs)))) + if (verbose) { + tsmessage("f = ", formatC(f), " rmax = ", formatC(rmax)) + } + cost$rmax <- rmax + + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + + R <- cost$R + D <- cost$D + rmax <- cost$rmax + Ddiff <- R - D + Ddiff[R > rmax & D > R] <- 0 + cost$pcost <- colSums(Ddiff * Ddiff) + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + eps <- cost$eps + R <- cost$R + D <- cost$D + + K <- -4 * (R - D) / (D + eps) + + rmax <- cost$rmax + K[R > rmax & D > R] <- 0 + + cost$G <- k2g(Y, K) + cost + }, + export = function(cost, val) { + res <- NULL + switch(val, + geo = { + res <- cost$R + }, + dy = { + res <- cost$D + } + ) + res + }, + update = function(cost, Y) { + cost$D <- calc_d(Y) + cost + } + ) +} + +# Create the symmetrized knn graph, don't correct non-neighborhood distances +# unless they are smaller than the input distance +knnmmds <- function(k, + eps = .Machine$double.eps, + use_cpp = FALSE, + n_threads = 0) { + lreplace( + mmds(use_cpp = use_cpp, n_threads = n_threads), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + cost <- mmds_init( + cost = cost, + X = X, + max_iter = max_iter, + eps = eps, + verbose = verbose, + ret_extra = ret_extra, + use_cpp = use_cpp, + n_threads = n_threads + ) + knn <- knn_graph( + X = X, + k = k, + n_threads = n_threads, + verbose = verbose + ) + # symmetrize + cost$knn <- pmax(knn, t(knn)) + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + + R <- cost$R + D <- cost$D + knn <- cost$knn + Ddiff <- R - D + Ddiff[knn == 0 & D > R] <- 0 + cost$pcost <- colSums(Ddiff * Ddiff) + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + + eps <- cost$eps + R <- cost$R + D <- cost$D + + K <- -4 * (R - D) / (D + eps) + + knn <- cost$knn + K[knn == 0 & D > R] <- 0 + + cost$G <- k2g(Y, K) + cost$D <- D + cost + }, + export = function(cost, val) { + res <- NULL + switch(val, + geo = { + res <- cost$R + }, + dy = { + res <- cost$D + } + ) + res + }, + update = function(cost, Y) { + cost$D <- calc_d(Y, use_cpp = use_cpp, n_threads = n_threads) + cost + } + ) +} + +# Given data X and k nearest neighbors, return a geodisic distance matrix +# Disconnections are treated by using the Euclidean distance. +geodesic <- function(X, + k, + fill = TRUE, + use_cpp = FALSE, + n_threads = 0, + verbose = FALSE) { + tsmessage("Calculating geodesic distances with k = ", k) + + R <- knn_dist(X, k, n_threads = n_threads, verbose = verbose) + # The hard work is done by Rfast's implementation of Floyd's algorithm + G <- Rfast::floyd(R) + if (any(is.infinite(G)) && fill) { + tsmessage( + "k = ", + k, + " resulted in disconnections: filling with Euclidean distances" + ) + if (methods::is(X, "dist")) { + R <- as.matrix(X) + } else { + R <- calc_d(X, use_cpp = use_cpp, n_threads = n_threads) + } + G[is.infinite(G)] <- R[is.infinite(G)] + } + G +} diff --git a/smallvis/R/perplexity.R b/smallvis/R/perplexity.R index 3e76e31..32cc502 100644 --- a/smallvis/R/perplexity.R +++ b/smallvis/R/perplexity.R @@ -264,34 +264,7 @@ knn_graph <- function(X, k, n_threads, verbose) { D } -# Given data X and k nearest neighbors, return a geodisic distance matrix -# Disconnections are treated by using the Euclidean distance. -geodesic <- function(X, - k, - fill = TRUE, - use_cpp = FALSE, - n_threads = 0, - verbose = FALSE) { - tsmessage("Calculating geodesic distances with k = ", k) - - R <- knn_dist(X, k, n_threads = n_threads, verbose = verbose) - # The hard work is done by Rfast's implementation of Floyd's algorithm - G <- Rfast::floyd(R) - if (any(is.infinite(G)) && fill) { - tsmessage( - "k = ", - k, - " resulted in disconnections: filling with Euclidean distances" - ) - if (methods::is(X, "dist")) { - R <- as.matrix(X) - } else { - R <- calc_d(X, use_cpp = use_cpp, n_threads = n_threads) - } - G[is.infinite(G)] <- R[is.infinite(G)] - } - G -} + # Multiscale perplexities: P is an average over the results of multiple # perplexities From f060ce385c319542e6ae7dabd20c15ae8aeaa704 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sat, 3 Aug 2024 11:53:31 -0700 Subject: [PATCH 16/26] Split cost into separate method files --- smallvis/R/absne.R | 607 ++++++++++++++++++++ smallvis/R/cost.R | 1269 +---------------------------------------- smallvis/R/ee.R | 85 +++ smallvis/R/fdiv.R | 310 ++++++++++ smallvis/R/gsne.R | 92 +++ smallvis/R/jse.R | 206 +++++++ smallvis/R/largevis.R | 71 +++ smallvis/R/nerv.R | 281 +++++++++ 8 files changed, 1672 insertions(+), 1249 deletions(-) create mode 100644 smallvis/R/absne.R create mode 100644 smallvis/R/ee.R create mode 100644 smallvis/R/fdiv.R create mode 100644 smallvis/R/gsne.R create mode 100644 smallvis/R/jse.R create mode 100644 smallvis/R/largevis.R create mode 100644 smallvis/R/nerv.R diff --git a/smallvis/R/absne.R b/smallvis/R/absne.R new file mode 100644 index 0000000..93ce6d4 --- /dev/null +++ b/smallvis/R/absne.R @@ -0,0 +1,607 @@ +# ABSNE ------------------------------------------------------------------- + +# alpha-beta divergence +absne <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + alpha = 1, + lambda = 1, + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + beta <- lambda - alpha + + eps0 <- 1e-5 + if (abs(alpha) > eps0 && abs(lambda) < eps0) { + # alpha != 0, beta = -alpha (=> lambda == 0) + return( + absneamb( + perplexity = perplexity, + inp_kernel = inp_kernel, + symmetrize = symmetrize, + alpha = alpha, + eps = eps + ) + ) + } + if (abs(alpha) > eps0 && abs(beta) < eps0) { + # alpha != 0, beta = 0 (=> lambda = alpha) + return( + absneb0( + perplexity = perplexity, + inp_kernel = inp_kernel, + symmetrize = symmetrize, + alpha = alpha, + eps = eps + ) + ) + } + if (abs(alpha) < eps0 && abs(beta) > eps0) { + # alpha = 0, beta != 0 (=> lambda = beta) + return( + absnea0( + perplexity = perplexity, + inp_kernel = inp_kernel, + symmetrize = symmetrize, + beta = beta, + eps = eps + ) + ) + } + if (abs(alpha) < eps0 && abs(beta) < eps0) { + # alpha = 0, beta = 0 (=> lambda = 0) + return( + absne00( + perplexity = perplexity, + inp_kernel = inp_kernel, + symmetrize = symmetrize, + eps = eps + ) + ) + } + + if (abs(lambda) < eps0) { + lambda <- ifelse(lambda == 0, 1, sign(lambda)) * eps0 + } + lreplace( + tsne( + perplexity = perplexity, + use_cpp = TRUE, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + if (verbose) { + tsmessage( + "Using ABSNE with alpha = ", + formatC(alpha), + " beta = ", + formatC(beta) + ) + } + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = TRUE + ) + cost$inva4 <- 4 / alpha + cost$minvab <- -1 / (alpha * beta) + cost$inval <- 1 / (alpha * lambda) + + cost$eps <- eps + cost + }, + cache_input = function(cost) { + P <- cost$P + eps <- cost$eps + cost$Pa <- powm(P, alpha, eps) + cost$Plc <- colSums(powm(P, lambda, eps)) / (beta * lambda) + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$pcost <- cost$minvab * cost$PaQbc + cost$Plc + cost$inval * cost$Qlc + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + Q <- cost$Q + cost$G <- k2g(Y, cost$inva4 * cost$Z * Q * + (cost$PaQb - cost$Ql + Q * (cost$Qls - cost$PaQbs))) + cost + }, + update = function(cost, Y) { + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + W <- 1 / (1 + W) + diag(W) <- 0 + Z <- sum(W) + Q <- W / Z + + eps <- cost$eps + cost$PaQb <- cost$Pa * powm(Q, beta, eps) + cost$PaQbc <- colSums(cost$PaQb) + cost$PaQbs <- sum(cost$PaQbc) + + cost$Q <- Q + cost$Ql <- powm(Q, lambda, eps) + cost$Qlc <- colSums(cost$Ql) + cost$Qls <- sum(cost$Qlc) + + cost$Z <- Z + cost + } + ) +} + +# alpha != 0, beta = 0 => lambda = alpha +absneb0 <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + alpha = 1, + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + tsne( + perplexity = perplexity, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + if (verbose) { + tsmessage("Using ABSNE with alpha = ", formatC(alpha), " beta = 0") + } + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + + cost$eps <- eps + + cost$inva2 <- 1 / (alpha * alpha) + cost$invam4 <- 4 / alpha + cost + }, + cache_input = function(cost) { + eps <- cost$eps + P <- cost$P + P[P < eps] <- eps + Pa <- powm(P, alpha, eps) + cost$Pa <- Pa + cost$PlPac <- colSums(P * logm(Pa, eps)) + cost$Pac <- colSums(Pa) + cost$Pas <- sum(cost$Pac) + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$pcost <- cost$inva2 * + (cost$PlPac - colSums(cost$Pa * logm(cost$Qa, cost$eps)) - + cost$Pac + cost$Qac) + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + Q <- cost$Q + + cost$G <- k2g(Y, cost$invam4 * Q * cost$Z * + (cost$Pa - cost$Qa + Q * (cost$Qas - cost$Pas))) + cost + }, + update = function(cost, Y) { + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + W <- 1 / (1 + W) + diag(W) <- 0 + Z <- sum(W) + Q <- W / Z + + cost$Q <- Q + cost$Z <- Z + + Qa <- powm(Q, alpha, cost$eps) + cost$Q <- Q + cost$Qa <- Qa + cost$Qac <- colSums(Qa) + cost$Qas <- sum(cost$Qac) + + cost + } + ) +} + +# alpha = -beta != 0 => lambda = 0 +absneamb <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + alpha = 1, + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + tsne( + perplexity = perplexity, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + if (verbose) { + tsmessage( + "Using ABSNE with alpha = ", + formatC(alpha), + " beta = -", + formatC(alpha) + ) + } + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + + cost$N <- nrow(cost$P) + cost$N2 <- (cost$N - 1) * cost$N + cost$eps <- eps + + cost$inva2 <- 1 / (alpha * alpha) + cost$invam4 <- 4 / alpha + cost + }, + cache_input = function(cost) { + eps <- cost$eps + P <- cost$P + P[P < eps] <- eps + Pa <- powm(P, alpha, eps) + cost$Pa <- Pa + Pa[Pa < eps] <- eps + cost$lPac <- colSums(logm(Pa, eps)) + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$pcost <- cost$inva2 * (cost$lQac - cost$lPac + cost$PadivQac - cost$N) + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + Q <- cost$Q + + cost$G <- k2g(Y, cost$invam4 * Q * cost$Z * + (cost$PadivQa - 1 + Q * (cost$N2 - cost$PadivQas))) + cost + }, + update = function(cost, Y) { + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + W <- 1 / (1 + W) + diag(W) <- 0 + Z <- sum(W) + Q <- W / Z + + cost$Q <- Q + cost$Z <- Z + + Qa <- powm(Q, alpha, cost$eps) + cost$lQac <- colSums(logm(Qa, cost$eps)) + cost$PadivQa <- divm(cost$Pa, Qa) + cost$PadivQac <- colSums(cost$PadivQa) + cost$PadivQas <- sum(cost$PadivQac) + + cost + } + ) +} + +# alpha = 0, beta != 0 => lambda = beta +absnea0 <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + beta = 1, + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + tsne( + perplexity = perplexity, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + if (verbose) { + tsmessage("Using ABSNE with alpha = 0, beta = ", formatC(beta)) + } + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + + cost$eps <- eps + + cost$invb2 <- 1 / (beta * beta) + cost$invbm4 <- 4 / beta + cost + }, + cache_input = function(cost) { + eps <- cost$eps + P <- cost$P + P[P < eps] <- eps + Pb <- powm(P, beta, eps) + Pb[Pb < eps] <- eps + cost$Pbc <- colSums(Pb) + cost$lPb <- logm(Pb, eps) + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + Qb <- cost$Qb + cost$pcost <- cost$invb2 * + (cost$QblQbc - cost$QblPbc + cost$Pbc - cost$Qbc) + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + Q <- cost$Q + + cost$G <- k2g(Y, cost$invbm4 * Q * cost$Z * + (cost$QblPb - cost$QblQb + Q * (cost$QblQbs - cost$QblPbs))) + cost + }, + update = function(cost, Y) { + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + W <- 1 / (1 + W) + diag(W) <- 0 + Z <- sum(W) + Q <- W / Z + + cost$Q <- Q + cost$Z <- Z + + Qb <- powm(Q, beta, cost$eps) + cost$Qbc <- colSums(Qb) + + cost$QblPb <- Qb * cost$lPb + cost$QblPbc <- colSums(cost$QblPb) + cost$QblPbs <- sum(cost$QblPb) + + cost$QblQb <- Qb * logm(Qb, cost$eps) + cost$QblQbc <- colSums(cost$QblQb) + cost$QblQbs <- sum(cost$QblQb) + + cost + } + ) +} + +# alpha = 0, beta = 0 => lambda = 0 +absne00 <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + tsne( + perplexity = perplexity, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + if (verbose) { + tsmessage("Using ABSNE with alpha = 0, beta = 0") + } + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + cost$eps <- eps + cost + }, + cache_input = function(cost) { + eps <- cost$eps + P <- cost$P + P[P < eps] <- eps + lP <- logm(P, eps) + cost$lP <- lP + cost$lPs <- sum(lP) + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + pcost <- cost$lP - cost$lQ + cost$pcost <- 0.5 * colSums(pcost * pcost) + + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + Q <- cost$Q + + cost$G <- k2g(Y, 4 * Q * cost$Z * + (cost$lP - cost$lQ + Q * (cost$lQs - cost$lPs))) + cost + }, + update = function(cost, Y) { + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + W <- 1 / (1 + W) + diag(W) <- 0 + Z <- sum(W) + Q <- W / Z + + cost$Q <- Q + cost$Z <- Z + + lQ <- logm(Q, cost$eps) + cost$lQ <- lQ + cost$lQs <- sum(lQ) + + cost + } + ) +} + +# alpha-beta divergence +abssne <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + alpha = 1, + lambda = 1, + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + beta <- lambda - alpha + + eps0 <- 1e-5 + + if (abs(alpha) < eps0) { + alpha <- ifelse(alpha == 0, 1, sign(alpha)) * eps0 + } + if (abs(beta) < eps0) { + beta <- ifelse(beta == 0, 1, sign(beta)) * eps0 + } + if (abs(lambda) < eps0) { + lambda <- ifelse(lambda == 0, 1, sign(lambda)) * eps0 + } + lreplace( + tsne( + perplexity = perplexity, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + if (verbose) { + tsmessage( + "Using ABSSNE with alpha = ", + formatC(alpha), + " beta = ", + formatC(beta) + ) + } + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + cost$inva4 <- 4 / alpha + cost$minvab <- -1 / (alpha * beta) + cost$inval <- 1 / (alpha * lambda) + cost$ibl <- 1 / (beta * lambda) + + cost$eps <- eps + cost + }, + cache_input = function(cost) { + P <- cost$P + eps <- cost$eps + P[P < eps] <- eps + cost$Pa <- powm(P, alpha, eps) + cost$Plc <- colSums(powm(P, lambda, eps)) * cost$ibl + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$pcost <- cost$minvab * cost$PaQbc + cost$Plc + cost$inval * cost$Qlc + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$G <- k2g(Y, cost$inva4 * + (cost$PaQb - cost$Ql + cost$Q * (cost$Qls - cost$PaQbs))) + cost + }, + update = function(cost, Y) { + eps <- cost$eps + + Q <- expQ( + Y, + cost$eps, + is_symmetric = TRUE, + matrix_normalize = TRUE, + use_cpp = use_cpp, + n_threads = n_threads + )$Q + + cost$PaQb <- cost$Pa * powm(Q, beta, eps) + cost$PaQbc <- colSums(cost$PaQb) + cost$PaQbs <- sum(cost$PaQbc) + + cost$Q <- Q + cost$Ql <- powm(Q, lambda, eps) + cost$Qlc <- colSums(cost$Ql) + cost$Qls <- sum(cost$Qlc) + + cost + } + ) +} diff --git a/smallvis/R/cost.R b/smallvis/R/cost.R index d929468..ec18e95 100644 --- a/smallvis/R/cost.R +++ b/smallvis/R/cost.R @@ -1,4 +1,4 @@ -# Generic Functions ------------------------------------------------------- +# Generic Weight/Cost/Gradient functions ---------------------------------- logm <- function(m, eps = .Machine$double.eps) { diag(m) <- eps @@ -16,7 +16,7 @@ divm <- function(m, n, eps = .Machine$double.eps) { powm <- function(m, n, eps = .Machine$double.eps) { diag(m) <- eps - m <- m ^ n + m <- m^n diag(m) <- 0 m } @@ -29,10 +29,19 @@ k2g <- function(Y, K, symmetrize = FALSE) { Y * colSums(K) - (K %*% Y) } -cost_init <- function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { +cost_init <- function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { if (!is.null(cost$init)) { - cost <- cost$init(cost, X, verbose = verbose, ret_extra = ret_extra, - max_iter = max_iter) + cost <- cost$init( + cost, + X, + verbose = verbose, + ret_extra = ret_extra, + max_iter = max_iter + ) } cost_cache_input(cost) } @@ -51,8 +60,7 @@ cost_point <- function(cost, Y) { cost_clear <- function(cost) { if (!is.null(cost$sentinel)) { cost[[cost$sentinel]] <- NULL - } - else if (is.null(cost$clear)) { + } else if (is.null(cost$clear)) { cost <- cost$clear(cost) } cost @@ -65,8 +73,7 @@ cost_update <- function(cost, Y) { if (is.null(cost[[cost$sentinel]])) { cost <- cost$update(cost, Y) } - } - else { + } else { cost <- cost$update(cost, Y) } cost @@ -78,15 +85,11 @@ cost_eval <- function(cost, Y, opt_res = NULL) { cost <- cost_point(cost, Y) pcosts <- cost$pcost cost_val <- sum(pcosts) - } - else { + } else { cost_val <- opt_res$f } - list( - cost = cost, - value = cost_val - ) + list(cost = cost, value = cost_val) } # Default export of values associated with a method @@ -96,8 +99,7 @@ cost_export <- function(cost, val) { res <- NULL if (!is.null(cost[[val]])) { res <- cost[[val]] - } - else if (!is.null(cost[[toupper(val)]])) { + } else if (!is.null(cost[[toupper(val)]])) { res <- cost[[toupper(val)]] } res @@ -107,7 +109,7 @@ cost_cache_input <- function(cost) { if (!is.null(cost$cache_input)) { cost <- cost$cache_input(cost) } - cost + cost } start_exaggerating <- function(cost, exaggeration_factor) { @@ -123,1234 +125,3 @@ stop_exaggerating <- function(cost, exaggeration_factor) { } cost_cache_input(cost) } - -# LargeVis ---------------------------------------------------------------- - -largevis <- function(perplexity, inp_kernel = "gaussian", - symmetrize = "symmetric", gamma = 1, gr_eps = 0.1, - normalize = TRUE, eps = 1e-9, row_weight = NULL, - use_cpp = FALSE, n_threads = 0) { - if (!is.null(row_weight)) { - row_normalize <- row_weight - } - else { - row_normalize <- TRUE - } - lreplace(tsne(perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - cost <- sne_init(cost, X = X, perplexity = perplexity, - symmetrize = symmetrize, kernel = inp_kernel, - normalize = normalize, verbose = verbose, - row_normalize = row_normalize, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - cost$eps <- eps - cost$greps1 <- gr_eps - 1 - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - - P <- cost$P - eps <- cost$eps - W <- cost$W - - cost$pcost <- colSums(-P * logm(W, eps) - gamma * log1p(-W + eps)) - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - W <- cost$W - cost$G <- k2g(Y, 4 * W * (cost$P - ((gamma * W) / (1 + cost$greps1 * W)))) - cost - }, - update = function(cost, Y) { - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - W <- 1 / (1 + W) - diag(W) <- 0 - - cost$W <- W - cost - }, - export = cost_export - ) -} - - -# f-Divergences ----------------------------------------------------------- - -# Reverse KL divergence -rklsne <- function(perplexity, inp_kernel = "gaussian", - symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { - lreplace( - tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - cost$eps <- eps - cost - }, - cache_input = function(cost) { - P <- cost$P - eps <- cost$eps - P[P < eps] <- eps - cost$lP <- logm(P, eps) - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - cost$pcost <- cost$QlQPcs - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - Q <- cost$Q - cost$G <- k2g(Y, 4 * Q * Q * cost$sumW * (sum(cost$QlQPcs) - cost$lQP)) - cost - }, - update = function(cost, Y) { - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - W <- 1 / (1 + W) - diag(W) <- 0 - sumW <- sum(W) - Q <- W / sumW - - cost$lQP <- logm(Q, cost$eps) - cost$lP - cost$QlQPcs <- colSums(Q * cost$lQP) - cost$Q <- Q - cost$sumW <- sumW - - cost - } - ) -} - -# Jensen-Shannon divergence -jssne <- function(perplexity, inp_kernel = "gaussian", - symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { - lreplace( - tsne(perplexity = perplexity, inp_kernel = inp_kernel, use_cpp == use_cpp, - n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - cost$eps <- eps - cost - }, - cache_input = function(cost) { - P <- cost$P - eps <- cost$eps - P[P < eps] <- eps - cost$PlP <- colSums(P * logm(P, eps)) - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - cost$pcost <- 0.5 * (cost$PlP + cost$QlQZ - colSums(cost$P * logm(cost$Z, cost$eps))) - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - Q <- cost$Q - - cost$G <- k2g(Y, 2 * Q * Q * cost$sumW * (cost$QlQZs - cost$lQZ)) - cost - }, - update = function(cost, Y) { - eps <- cost$eps - P <- cost$P - - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - W <- 1 / (1 + W) - diag(W) <- 0 - sumW <- sum(W) - - Q <- W / sumW - Z <- 0.5 * (P + Q) - - QlQZ <- logm(Q / Z, eps) - cost$QlQZ <- colSums(Q * QlQZ) - cost$QlQZs <- sum(cost$QlQZ) - - cost$lQZ <- QlQZ - cost$sumW <- sumW - cost$Q <- Q - cost$Z <- Z - - cost - } - ) -} - -# Chi-squared divergence -chsne <- function(perplexity, inp_kernel = "gaussian", - symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { - lreplace( - tsne(perplexity = perplexity, inp_kernel = inp_kernel, use_cpp == use_cpp, - n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - cost$eps <- eps - cost - }, - cache_input = function(cost) { - P <- cost$P - cost$P2 <- P * P - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - PQ <- cost$P - cost$Q - - cost$pcost <- colSums(PQ * PQ * cost$invQ) - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - - Q <- cost$Q - Z <- cost$Z - invQ <- cost$invQ - - P2Q <- cost$P2 * invQ - - cost$G <- k2g(Y, 4 * Q * Q * Z * (P2Q * invQ - sum(P2Q))) - cost - }, - update = function(cost, Y) { - P <- cost$P - - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - W <- 1 / (1 + W) - diag(W) <- 0 - Z <- sum(W) - - Q <- W / Z - - cost$Q <- Q - cost$Z <- Z - invQ <- 1 / Q - diag(invQ) <- 0 - cost$invQ <- invQ - - cost - } - ) -} - -# Hellinger distance divergence -hlsne <- function(perplexity, inp_kernel = "gaussian", - symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { - lreplace( - tsne(perplexity = perplexity, inp_kernel = inp_kernel, use_cpp == use_cpp, - n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = "symmetric", normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - cost$eps <- eps - cost - }, - cache_input = function(cost) { - cost$sP <- sqrt(cost$P) - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - - PQ <- cost$sP - cost$sQ - - cost$pcost <- colSums(PQ * PQ) - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - sP <- cost$sP - Q <- cost$Q - Z <- cost$Z - sQ <- cost$sQ - - sPQ <- sum(sP * sQ) - PQ <- sP / sQ - diag(PQ) <- 0 - - cost$G <- k2g(Y, 4 * Q * Q * Z * (PQ - sPQ)) - cost - }, - update = function(cost, Y) { - P <- cost$P - - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - W <- 1 / (1 + W) - diag(W) <- 0 - Z <- sum(W) - - Q <- W / Z - - cost$Q <- Q - cost$Z <- Z - cost$sQ <- sqrt(Q) - - cost - } - ) -} - -# ABSNE ------------------------------------------------------------------- - -# alpha-beta divergence -absne <- function(perplexity, inp_kernel = "gaussian", - symmetrize = "symmetric", alpha = 1, lambda = 1, - eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { - beta <- lambda - alpha - - eps0 <- 1e-5 - if (abs(alpha) > eps0 && abs(lambda) < eps0) { - # alpha != 0, beta = -alpha (=> lambda == 0) - return(absneamb(perplexity = perplexity, inp_kernel = inp_kernel, - symmetrize = symmetrize, alpha = alpha, eps = eps)) - } - if (abs(alpha) > eps0 && abs(beta) < eps0) { - # alpha != 0, beta = 0 (=> lambda = alpha) - return(absneb0(perplexity = perplexity, inp_kernel = inp_kernel, - symmetrize = symmetrize, alpha = alpha, eps = eps)) - } - if (abs(alpha) < eps0 && abs(beta) > eps0 ) { - # alpha = 0, beta != 0 (=> lambda = beta) - return(absnea0(perplexity = perplexity, inp_kernel = inp_kernel, - symmetrize = symmetrize, beta = beta, eps = eps)) - } - if (abs(alpha) < eps0 && abs(beta) < eps0) { - # alpha = 0, beta = 0 (=> lambda = 0) - return(absne00(perplexity = perplexity, inp_kernel = inp_kernel, - symmetrize = symmetrize, eps = eps)) - } - - if (abs(lambda) < eps0) { - lambda <- ifelse(lambda == 0, 1, sign(lambda)) * eps0 - } - lreplace( - tsne(perplexity = perplexity, use_cpp = TRUE, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - if (verbose) { - tsmessage("Using ABSNE with alpha = ", formatC(alpha), - " beta = ", formatC(beta)) - } - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = TRUE) - cost$inva4 <- 4 / alpha - cost$minvab <- -1 / (alpha * beta) - cost$inval <- 1 / (alpha * lambda) - - cost$eps <- eps - cost - }, - cache_input = function(cost) { - P <- cost$P - eps <- cost$eps - cost$Pa <- powm(P, alpha, eps) - cost$Plc <- colSums(powm(P, lambda, eps)) / (beta * lambda) - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - cost$pcost <- cost$minvab * cost$PaQbc + cost$Plc + cost$inval * cost$Qlc - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - Q <- cost$Q - cost$G <- k2g(Y, cost$inva4 * cost$Z * Q * - (cost$PaQb - cost$Ql + Q * (cost$Qls - cost$PaQbs))) - cost - }, - update = function(cost, Y) { - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - W <- 1 / (1 + W) - diag(W) <- 0 - Z <- sum(W) - Q <- W / Z - - eps <- cost$eps - cost$PaQb <- cost$Pa * powm(Q, beta, eps) - cost$PaQbc <- colSums(cost$PaQb) - cost$PaQbs <- sum(cost$PaQbc) - - cost$Q <- Q - cost$Ql <- powm(Q, lambda, eps) - cost$Qlc <- colSums(cost$Ql) - cost$Qls <- sum(cost$Qlc) - - cost$Z <- Z - cost - } - ) -} - -# alpha != 0, beta = 0 => lambda = alpha -absneb0 <- function(perplexity, inp_kernel = "gaussian", - symmetrize = "symmetric", alpha = 1, - eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { - lreplace( - tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - if (verbose) { - tsmessage("Using ABSNE with alpha = ", formatC(alpha), - " beta = 0") - } - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - - cost$eps <- eps - - cost$inva2 <- 1 / (alpha * alpha) - cost$invam4 <- 4 / alpha - cost - }, - cache_input = function(cost) { - eps <- cost$eps - P <- cost$P - P[P < eps] <- eps - Pa <- powm(P, alpha, eps) - cost$Pa <- Pa - cost$PlPac <- colSums(P * logm(Pa, eps)) - cost$Pac <- colSums(Pa) - cost$Pas <- sum(cost$Pac) - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - cost$pcost <- cost$inva2 * - (cost$PlPac - colSums(cost$Pa * logm(cost$Qa, cost$eps)) - - cost$Pac + cost$Qac) - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - Q <- cost$Q - - cost$G <- k2g(Y, cost$invam4 * Q * cost$Z * - (cost$Pa - cost$Qa + Q * (cost$Qas - cost$Pas))) - cost - }, - update = function(cost, Y) { - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - W <- 1 / (1 + W) - diag(W) <- 0 - Z <- sum(W) - Q <- W / Z - - cost$Q <- Q - cost$Z <- Z - - Qa <- powm(Q, alpha, cost$eps) - cost$Q <- Q - cost$Qa <- Qa - cost$Qac <- colSums(Qa) - cost$Qas <- sum(cost$Qac) - - cost - } - ) -} - -# alpha = -beta != 0 => lambda = 0 -absneamb <- function(perplexity, inp_kernel = "gaussian", - symmetrize = "symmetric", alpha = 1, - eps = .Machine$double.eps, n_threads = 0, - use_cpp = FALSE) { - lreplace( - tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - if (verbose) { - tsmessage("Using ABSNE with alpha = ", formatC(alpha), - " beta = -", formatC(alpha)) - } - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - - cost$N <- nrow(cost$P) - cost$N2 <- (cost$N - 1) * cost$N - cost$eps <- eps - - cost$inva2 <- 1 / (alpha * alpha) - cost$invam4 <- 4 / alpha - cost - }, - cache_input = function(cost) { - eps <- cost$eps - P <- cost$P - P[P < eps] <- eps - Pa <- powm(P, alpha, eps) - cost$Pa <- Pa - Pa[Pa < eps] <- eps - cost$lPac <- colSums(logm(Pa, eps)) - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - cost$pcost <- cost$inva2 * (cost$lQac - cost$lPac + cost$PadivQac - cost$N) - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - Q <- cost$Q - - cost$G <- k2g(Y, cost$invam4 * Q * cost$Z * - (cost$PadivQa - 1 + Q * (cost$N2 - cost$PadivQas))) - cost - }, - update = function(cost, Y) { - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - W <- 1 / (1 + W) - diag(W) <- 0 - Z <- sum(W) - Q <- W / Z - - cost$Q <- Q - cost$Z <- Z - - Qa <- powm(Q, alpha, cost$eps) - cost$lQac <- colSums(logm(Qa, cost$eps)) - cost$PadivQa <- divm(cost$Pa, Qa) - cost$PadivQac <- colSums(cost$PadivQa) - cost$PadivQas <- sum(cost$PadivQac) - - cost - } - ) -} - -# alpha = 0, beta != 0 => lambda = beta -absnea0 <- function(perplexity, inp_kernel = "gaussian", - symmetrize = "symmetric", beta = 1, - eps = .Machine$double.eps, n_threads = 0, - use_cpp = FALSE) { - lreplace( - tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - if (verbose) { - tsmessage("Using ABSNE with alpha = 0, beta = ", formatC(beta)) - } - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - - cost$eps <- eps - - cost$invb2 <- 1 / (beta * beta) - cost$invbm4 <- 4 / beta - cost - }, - cache_input = function(cost) { - eps <- cost$eps - P <- cost$P - P[P < eps] <- eps - Pb <- powm(P, beta, eps) - Pb[Pb < eps] <- eps - cost$Pbc <- colSums(Pb) - cost$lPb <- logm(Pb, eps) - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - Qb <- cost$Qb - cost$pcost <- cost$invb2 * - (cost$QblQbc - cost$QblPbc + cost$Pbc - cost$Qbc) - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - Q <- cost$Q - - cost$G <- k2g(Y, cost$invbm4 * Q * cost$Z * - (cost$QblPb - cost$QblQb + Q * (cost$QblQbs - cost$QblPbs))) - cost - }, - update = function(cost, Y) { - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - W <- 1 / (1 + W) - diag(W) <- 0 - Z <- sum(W) - Q <- W / Z - - cost$Q <- Q - cost$Z <- Z - - Qb <- powm(Q, beta, cost$eps) - cost$Qbc <- colSums(Qb) - - cost$QblPb <- Qb * cost$lPb - cost$QblPbc <- colSums(cost$QblPb) - cost$QblPbs <- sum(cost$QblPb) - - cost$QblQb <- Qb * logm(Qb, cost$eps) - cost$QblQbc <- colSums(cost$QblQb) - cost$QblQbs <- sum(cost$QblQb) - - cost - } - ) -} - -# alpha = 0, beta = 0 => lambda = 0 -absne00 <- function(perplexity, inp_kernel = "gaussian", - symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { - lreplace( - tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - if (verbose) { - tsmessage("Using ABSNE with alpha = 0, beta = 0") - } - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - cost$eps <- eps - cost - }, - cache_input = function(cost) { - eps <- cost$eps - P <- cost$P - P[P < eps] <- eps - lP <- logm(P, eps) - cost$lP <- lP - cost$lPs <- sum(lP) - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - pcost <- cost$lP - cost$lQ - cost$pcost <- 0.5 * colSums(pcost * pcost) - - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - Q <- cost$Q - - cost$G <- k2g(Y, 4 * Q * cost$Z * - (cost$lP - cost$lQ + Q * (cost$lQs - cost$lPs))) - cost - }, - update = function(cost, Y) { - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - W <- 1 / (1 + W) - diag(W) <- 0 - Z <- sum(W) - Q <- W / Z - - cost$Q <- Q - cost$Z <- Z - - lQ <- logm(Q, cost$eps) - cost$lQ <- lQ - cost$lQs <- sum(lQ) - - cost - } - ) -} - -# alpha-beta divergence -abssne <- function(perplexity, inp_kernel = "gaussian", - symmetrize = "symmetric", alpha = 1, lambda = 1, - eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { - beta <- lambda - alpha - - eps0 <- 1e-5 - - if (abs(alpha) < eps0) { - alpha <- ifelse(alpha == 0, 1, sign(alpha)) * eps0 - } - if (abs(beta) < eps0) { - beta <- ifelse(beta == 0, 1, sign(beta)) * eps0 - } - if (abs(lambda) < eps0) { - lambda <- ifelse(lambda == 0, 1, sign(lambda)) * eps0 - } - lreplace( - tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - if (verbose) { - tsmessage("Using ABSSNE with alpha = ", formatC(alpha), - " beta = ", formatC(beta)) - } - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - cost$inva4 <- 4 / alpha - cost$minvab <- -1 / (alpha * beta) - cost$inval <- 1 / (alpha * lambda) - cost$ibl <- 1 / (beta * lambda) - - cost$eps <- eps - cost - }, - cache_input = function(cost) { - P <- cost$P - eps <- cost$eps - P[P < eps] <- eps - cost$Pa <- powm(P, alpha, eps) - cost$Plc <- colSums(powm(P, lambda, eps)) * cost$ibl - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - cost$pcost <- cost$minvab * cost$PaQbc + cost$Plc + cost$inval * cost$Qlc - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - cost$G <- k2g(Y, cost$inva4 * - (cost$PaQb - cost$Ql + cost$Q * (cost$Qls - cost$PaQbs))) - cost - }, - update = function(cost, Y) { - eps <- cost$eps - - Q <- expQ(Y, cost$eps, is_symmetric = TRUE, matrix_normalize = TRUE, - use_cpp = use_cpp, n_threads = n_threads)$Q - - cost$PaQb <- cost$Pa * powm(Q, beta, eps) - cost$PaQbc <- colSums(cost$PaQb) - cost$PaQbs <- sum(cost$PaQbc) - - cost$Q <- Q - cost$Ql <- powm(Q, lambda, eps) - cost$Qlc <- colSums(cost$Ql) - cost$Qls <- sum(cost$Qlc) - - cost - } - ) -} - -# Other Divergences ------------------------------------------------------- - -# global-SNE -gsne <- function(perplexity, lambda = 1, inp_kernel = "gaussian", - symmetrize = "symmetric", eps = .Machine$double.eps, - use_cpp = FALSE, n_threads = 0) { - lreplace( - tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - cost$eps <- eps - - # Phat isn't affected by early exaggeration, so we cache it once only - if (methods::is(X, "dist")) { - Phat <- as.matrix(X) - } - else { - Phat <- calc_d2(X, use_cpp = use_cpp, n_threads = n_threads) - } - - Phat <- Phat + 1 - diag(Phat) <- 0 - - Phat <- Phat / sum(Phat) - cost$Phat <- Phat - cost$phlogph <- colSums(Phat * logm(Phat, eps)) - - cost - }, - cache_input = function(cost) { - eps <- cost$eps - P <- cost$P - P[P < eps] <- eps - cost$plogp <- colSums(P * logm(P, eps)) - - cost$plamphat <- P - lambda * cost$Phat - - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - - eps <- cost$eps - kl <- cost$plogp - colSums(cost$P * logm(cost$W / cost$Z, eps)) - klhat <- cost$phlogph - colSums(cost$Phat * logm(cost$What / cost$Zhat, eps)) - cost$pcost <- kl + lambda * klhat - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - - qlampqhat <- (cost$W / cost$Z) - lambda * (cost$What / cost$Zhat) - cost$G <- k2g(Y, 4 * cost$W * (cost$plamphat - qlampqhat)) - cost - }, - update = function(cost, Y) { - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - What <- 1 + W - W <- 1 / What - diag(W) <- 0 - cost$Z <- sum(W) - cost$W <- W - - diag(What) <- 0 - cost$What <- What - cost$Zhat <- sum(What) - - cost - } - ) -} - -# Elastic Embedding ------------------------------------------------------- - -# Carreira-Perpinán, M. A. (2010, June). -# The Elastic Embedding Algorithm for Dimensionality Reduction. -# In \emph{Proceedings of the 27th International Conference on Machine Learning (ICML-10)} (pp. 167-174). -# http://faculty.ucmerced.edu/mcarreira-perpinan/papers/icml10.pdf (PDF) -# lambda control the strength of repulsive vs attractive forces -# if neg_weights is true, the repulsive contribution is weighted based on the -# squared input distances. Otherwise, no weighting is applied. -ee <- function(perplexity, lambda = 100, neg_weights = TRUE, - inp_kernel = "gaussian", symmetrize = "symmetric", - eps = .Machine$double.eps, use_cpp = FALSE, n_threads = 0) { - list( - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - if (neg_weights) { - if (methods::is(X, "dist")) { - R <- X - } - else { - R <- calc_d(X, use_cpp = use_cpp, n_threads = n_threads) - } - cost$Vn <- R / sum(R) - } - else { - cost$Vn <- 1 - } - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - cost$eps <- eps - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - - Vp <- cost$P - Vn <- cost$Vn - W <- cost$W - cost$pcost <- colSums(-Vp * logm(W, cost$eps) + lambda * (Vn * W)) - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - - Vp <- cost$P - Vn <- cost$Vn - cost$G <- k2g(Y, 4 * (Vp - lambda * Vn * cost$W)) - cost - }, - export = function(cost, val) { - res <- NULL - if (!is.null(cost[[val]])) { - res <- cost[[val]] - } - else if (!is.null(cost[[toupper(val)]])) { - res <- cost[[toupper(val)]] - } - res - }, - update = function(cost, Y) { - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - W <- exp(-W) - diag(W) <- 0 - cost$W <- W - cost - }, - sentinel = "W" - ) -} - - -# JSE and NeRV ------------------------------------------------------------ - -# Venna, J., Peltonen, J., Nybo, K., Aidos, H., & Kaski, S. (2010). -# Information retrieval perspective to nonlinear dimensionality reduction for -# data visualization. -# \emph{Journal of Machine Learning Research}, \emph{11}, 451-490. -# -# Unlike original publication, won't transfer input precisions to output kernel -# lambda = 1 gives ASNE results -# default lambda = 0.9 from "Majorization-Minimization for Manifold Embedding" -# Yang, Peltonen, Kaski 2015 -nerv <- function(perplexity, lambda = 0.9, inp_kernel = "gaussian", - eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { - lambda2 <- 2 * lambda - oml <- 1 - lambda - oml2 <- 2 * oml - lreplace( - tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = "none", normalize = FALSE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - cost$eps <- eps - cost - }, - cache_input = function(cost) { - eps <- cost$eps - P <- cost$P - P[P < eps] <- eps - cost$P <- P - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - - P <- cost$P - - kl_fwd <- rowSums(P * cost$lPQ) - - cost$pcost <- lambda * kl_fwd + oml * cost$kl_rev - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - Q <- cost$Q - - # Total K including multiplying by 2 in gradient - K <- lambda2 * (cost$P - Q) + oml2 * Q * (cost$lPQ + cost$kl_rev) - - cost$G <- k2g(Y, K, symmetrize = TRUE) - cost - }, - update = function(cost, Y) { - eps <- cost$eps - - Q <- expQ(Y, eps, is_symmetric = TRUE, use_cpp = use_cpp, - n_threads = n_threads)$Q - cost$Q <- Q - - # Reverse KL gradient - cost$lPQ <- logm(cost$P / Q, eps) - # for KLrev we want Q * log(Q/P), so take -ve of log(P/Q) - cost$kl_rev <- rowSums(Q * -cost$lPQ) - cost - }, - sentinel = "Q", - export = function(cost, val) { - res <- cost_export(cost, val) - res - } - ) -} - -snerv <- function(perplexity, lambda = 0.9, inp_kernel = "gaussian", - symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { - lambda4 <- 4 * lambda - oml <- 1 - lambda - oml4 <- 4 * oml - lreplace( - ssne(perplexity = perplexity, inp_kernel = inp_kernel, - symmetrize = symmetrize, eps = eps, n_threads = n_threads), - cache_input = function(cost) { - eps <- cost$eps - P <- cost$P - P[P < eps] <- eps - cost$P <- P - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - - P <- cost$P - - kl_fwd <- colSums(P * cost$lPQ) - - cost$pcost <- lambda * kl_fwd + oml * cost$kl_rev - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - Q <- cost$Q - - # Total K including multiplying by 4 in gradient - K <- lambda4 * (cost$P - Q) + oml4 * Q * (cost$lPQ + cost$QlPQs) - - cost$G <- k2g(Y, K, symmetrize = FALSE) - cost - }, - update = function(cost, Y) { - eps <- cost$eps - - Q <- expQ(Y, eps, is_symmetric = TRUE, matrix_normalize = TRUE, - use_cpp = use_cpp, n_threads = n_threads)$Q - cost$Q <- Q - - # Reverse KL gradient - cost$lPQ <- logm(cost$P / Q, eps) - cost$kl_rev <- colSums(Q * -cost$lPQ) - cost$QlPQs <- sum(cost$kl_rev) - cost - }, - export = function(cost, val) { - res <- cost_export(cost, val) - res - } - ) -} - -# Lee, J. A., Renard, E., Bernard, G., Dupont, P., & Verleysen, M. (2013). -# Type 1 and 2 mixtures of Kullback-Leibler divergences as cost functions in -# dimensionality reduction based on similarity preservation. -# \emph{Neurocomputing}, \emph{112}, 92-108. -# kappa = 0 behaves like ASNE -# kappa = 1 behaves like NeRV with lambda = 0. Yes that's confusing. -jse <- function(perplexity, kappa = 0.5, inp_kernel = "gaussian", - eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { - eps0 <- 1e-5 - kappa <- max(kappa, eps0) - kappa <- min(kappa, 1 - eps0) - - kappa_inv <- 1 / kappa - m2_kappa_inv <- -2 * kappa_inv - om_kappa <- 1 - kappa - om_kappa_inv <- 1 / om_kappa - - lreplace( - ssne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = "none", normalize = FALSE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - cost$eps <- eps - cost - }, - cache_input = function(cost) { - eps <- cost$eps - P <- cost$P - P[P < eps] <- eps - cost$plogp <- rowSums(P * logm(P, eps)) - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - eps <- cost$eps - - cost$pcost <- - om_kappa_inv * (cost$plogp - rowSums(cost$P * logm(cost$Z, eps))) + - kappa_inv * cost$QlQZc - - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - K <- m2_kappa_inv * cost$Q * (cost$lQZ - cost$QlQZc) - cost$G <- k2g(Y, K, - symmetrize = TRUE) - - cost - }, - update = function(cost, Y) { - eps <- cost$eps - - Q <- expQ(Y, eps = eps, is_symmetric = TRUE, use_cpp = use_cpp, - n_threads = n_threads)$Q - - Z <- kappa * cost$P + om_kappa * Q - Z[Z < eps] <- eps - diag(Z) <- 0 - - cost$Q <- Q - cost$Z <- Z - cost$lQZ <- logm(Q / Z, eps) - cost$QlQZc <- rowSums(Q * cost$lQZ) - - cost - }, - export = cost_export - ) -} - - -sjse <- function(perplexity, kappa = 0.5, inp_kernel = "gaussian", - symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { - eps0 <- 1e-5 - kappa <- max(kappa, eps0) - kappa <- min(kappa, 1 - eps0) - - kappa_inv <- 1 / kappa - m4_kappa_inv <- -4 * kappa_inv - om_kappa <- 1 - kappa - om_kappa_inv <- 1 / om_kappa - - lreplace( - ssne(perplexity = perplexity, inp_kernel = inp_kernel, - symmetrize = symmetrize, eps = eps, n_threads = n_threads, - use_cpp = use_cpp), - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - eps <- cost$eps - - cost$pcost <- - om_kappa_inv * (cost$plogp - colSums(cost$P * logm(cost$Z, eps))) + - kappa_inv * cost$QlQZc - - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - K <- m4_kappa_inv * cost$Q * (cost$lQZ - cost$QlQZs) - cost$G <- k2g(Y, K, symmetrize = FALSE) - - cost - }, - update = function(cost, Y) { - eps <- cost$eps - - Q <- expQ(Y, cost$eps, is_symmetric = TRUE, matrix_normalize = TRUE, - use_cpp = use_cpp, n_threads = n_threads)$Q - Z <- kappa * cost$P + om_kappa * Q - Z[Z < eps] <- eps - diag(Z) <- 0 - - cost$Q <- Q - cost$Z <- Z - cost$lQZ <- logm(Q / Z, eps) - cost$QlQZc <- colSums(Q * cost$lQZ) - cost$QlQZs <- sum(cost$QlQZc) - - cost - }, - export = cost_export - ) -} - - -rsrnerv <- function(perplexity, lambda = 0.9, eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { - lreplace(nerv(perplexity = perplexity, lambda = lambda, use_cpp = use_cpp, - n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, - ret_extra = c()) { - cost <- sne_init(cost, X, perplexity = perplexity, - symmetrize = "symmetric", normalize = FALSE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - P <- cost$P - P <- P / rowSums(P) - cost$P <- P - - cost$eps <- eps - cost - } - ) -} - -rsrjse <- function(perplexity, kappa = 0.5, eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { - lreplace(jse(perplexity = perplexity, kappa = kappa, use_cpp = use_cpp, - n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, - ret_extra = c()) { - cost <- sne_init(cost, X, perplexity = perplexity, - symmetrize = "symmetric", normalize = FALSE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - P <- cost$P - P <- P / rowSums(P) - cost$P <- P - - cost$eps <- eps - cost - } - ) -} - -# NeRV with input bandwidths transferred to the output kernel, as in the -# original paper. -bnerv <- function(perplexity, lambda = 0.9, eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { - lambda2 <- 2 * lambda - oml <- 1 - lambda - oml2 <- 2 * oml - lreplace( - nerv(perplexity = perplexity, lambda = lambda, use_cpp = use_cpp, - n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - ret_extra <- unique(c(ret_extra, 'beta')) - - cost <- sne_init(cost, X, perplexity = perplexity, - symmetrize = "none", normalize = FALSE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - cost$eps <- eps - cost$lambda2b <- lambda2 * cost$beta - cost$oml2b <- oml2 * cost$beta - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - Q <- cost$Q - - # Total K including multiplying by 2 * beta in gradient - K <- cost$lambda2b * (cost$P - Q) + cost$oml2b * Q * (cost$lPQ + cost$kl_rev) - - cost$G <- k2g(Y, K, symmetrize = TRUE) - cost - - cost - }, - update = function(cost, Y) { - eps <- cost$eps - - Q <- expQ(Y, eps, beta = cost$beta, is_symmetric = FALSE, - use_cpp = use_cpp, n_threads = n_threads)$Q - cost$Q <- Q - - # Reverse KL gradient - cost$lPQ <- logm(cost$P / Q, eps) - cost$kl_rev <- rowSums(Q * -cost$lPQ) - - cost - } - ) -} diff --git a/smallvis/R/ee.R b/smallvis/R/ee.R new file mode 100644 index 0000000..87be379 --- /dev/null +++ b/smallvis/R/ee.R @@ -0,0 +1,85 @@ +# Elastic Embedding ------------------------------------------------------- + +# Carreira-Perpinán, M. A. (2010, June). +# The Elastic Embedding Algorithm for Dimensionality Reduction. +# In \emph{Proceedings of the 27th International Conference on Machine Learning (ICML-10)} (pp. 167-174). +# http://faculty.ucmerced.edu/mcarreira-perpinan/papers/icml10.pdf (PDF) +# lambda control the strength of repulsive vs attractive forces +# if neg_weights is true, the repulsive contribution is weighted based on the +# squared input distances. Otherwise, no weighting is applied. +ee <- function(perplexity, + lambda = 100, + neg_weights = TRUE, + inp_kernel = "gaussian", + symmetrize = "symmetric", + eps = .Machine$double.eps, + use_cpp = FALSE, + n_threads = 0) { + list( + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + if (neg_weights) { + if (methods::is(X, "dist")) { + R <- X + } else { + R <- calc_d(X, use_cpp = use_cpp, n_threads = n_threads) + } + cost$Vn <- R / sum(R) + } else { + cost$Vn <- 1 + } + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + cost$eps <- eps + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + + Vp <- cost$P + Vn <- cost$Vn + W <- cost$W + cost$pcost <- colSums(-Vp * logm(W, cost$eps) + lambda * (Vn * W)) + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + + Vp <- cost$P + Vn <- cost$Vn + cost$G <- k2g(Y, 4 * (Vp - lambda * Vn * cost$W)) + cost + }, + export = function(cost, val) { + res <- NULL + if (!is.null(cost[[val]])) { + res <- cost[[val]] + } else if (!is.null(cost[[toupper(val)]])) { + res <- cost[[toupper(val)]] + } + res + }, + update = function(cost, Y) { + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + W <- exp(-W) + diag(W) <- 0 + cost$W <- W + cost + }, + sentinel = "W" + ) +} diff --git a/smallvis/R/fdiv.R b/smallvis/R/fdiv.R new file mode 100644 index 0000000..3be1365 --- /dev/null +++ b/smallvis/R/fdiv.R @@ -0,0 +1,310 @@ +# f-Divergences ----------------------------------------------------------- + +# Reverse KL divergence +rklsne <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + tsne( + perplexity = perplexity, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + cost$eps <- eps + cost + }, + cache_input = function(cost) { + P <- cost$P + eps <- cost$eps + P[P < eps] <- eps + cost$lP <- logm(P, eps) + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$pcost <- cost$QlQPcs + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + Q <- cost$Q + cost$G <- k2g(Y, 4 * Q * Q * cost$sumW * (sum(cost$QlQPcs) - cost$lQP)) + cost + }, + update = function(cost, Y) { + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + W <- 1 / (1 + W) + diag(W) <- 0 + sumW <- sum(W) + Q <- W / sumW + + cost$lQP <- logm(Q, cost$eps) - cost$lP + cost$QlQPcs <- colSums(Q * cost$lQP) + cost$Q <- Q + cost$sumW <- sumW + + cost + } + ) +} + +# Jensen-Shannon divergence +jssne <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + tsne( + perplexity = perplexity, + inp_kernel = inp_kernel, + use_cpp == use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + cost$eps <- eps + cost + }, + cache_input = function(cost) { + P <- cost$P + eps <- cost$eps + P[P < eps] <- eps + cost$PlP <- colSums(P * logm(P, eps)) + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$pcost <- 0.5 * (cost$PlP + cost$QlQZ - colSums(cost$P * logm(cost$Z, cost$eps))) + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + Q <- cost$Q + + cost$G <- k2g(Y, 2 * Q * Q * cost$sumW * (cost$QlQZs - cost$lQZ)) + cost + }, + update = function(cost, Y) { + eps <- cost$eps + P <- cost$P + + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + W <- 1 / (1 + W) + diag(W) <- 0 + sumW <- sum(W) + + Q <- W / sumW + Z <- 0.5 * (P + Q) + + QlQZ <- logm(Q / Z, eps) + cost$QlQZ <- colSums(Q * QlQZ) + cost$QlQZs <- sum(cost$QlQZ) + + cost$lQZ <- QlQZ + cost$sumW <- sumW + cost$Q <- Q + cost$Z <- Z + + cost + } + ) +} + +# Chi-squared divergence +chsne <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + tsne( + perplexity = perplexity, + inp_kernel = inp_kernel, + use_cpp == use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + cost$eps <- eps + cost + }, + cache_input = function(cost) { + P <- cost$P + cost$P2 <- P * P + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + PQ <- cost$P - cost$Q + + cost$pcost <- colSums(PQ * PQ * cost$invQ) + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + + Q <- cost$Q + Z <- cost$Z + invQ <- cost$invQ + + P2Q <- cost$P2 * invQ + + cost$G <- k2g(Y, 4 * Q * Q * Z * (P2Q * invQ - sum(P2Q))) + cost + }, + update = function(cost, Y) { + P <- cost$P + + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + W <- 1 / (1 + W) + diag(W) <- 0 + Z <- sum(W) + + Q <- W / Z + + cost$Q <- Q + cost$Z <- Z + invQ <- 1 / Q + diag(invQ) <- 0 + cost$invQ <- invQ + + cost + } + ) +} + +# Hellinger distance divergence +hlsne <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + tsne( + perplexity = perplexity, + inp_kernel = inp_kernel, + use_cpp == use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = "symmetric", + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + cost$eps <- eps + cost + }, + cache_input = function(cost) { + cost$sP <- sqrt(cost$P) + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + + PQ <- cost$sP - cost$sQ + + cost$pcost <- colSums(PQ * PQ) + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + sP <- cost$sP + Q <- cost$Q + Z <- cost$Z + sQ <- cost$sQ + + sPQ <- sum(sP * sQ) + PQ <- sP / sQ + diag(PQ) <- 0 + + cost$G <- k2g(Y, 4 * Q * Q * Z * (PQ - sPQ)) + cost + }, + update = function(cost, Y) { + P <- cost$P + + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + W <- 1 / (1 + W) + diag(W) <- 0 + Z <- sum(W) + + Q <- W / Z + + cost$Q <- Q + cost$Z <- Z + cost$sQ <- sqrt(Q) + + cost + } + ) +} diff --git a/smallvis/R/gsne.R b/smallvis/R/gsne.R new file mode 100644 index 0000000..f1dbc87 --- /dev/null +++ b/smallvis/R/gsne.R @@ -0,0 +1,92 @@ +# global-SNE +gsne <- function(perplexity, + lambda = 1, + inp_kernel = "gaussian", + symmetrize = "symmetric", + eps = .Machine$double.eps, + use_cpp = FALSE, + n_threads = 0) { + lreplace( + tsne( + perplexity = perplexity, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + cost$eps <- eps + + # Phat isn't affected by early exaggeration, so we cache it once only + if (methods::is(X, "dist")) { + Phat <- as.matrix(X) + } else { + Phat <- calc_d2(X, use_cpp = use_cpp, n_threads = n_threads) + } + + Phat <- Phat + 1 + diag(Phat) <- 0 + + Phat <- Phat / sum(Phat) + cost$Phat <- Phat + cost$phlogph <- colSums(Phat * logm(Phat, eps)) + + cost + }, + cache_input = function(cost) { + eps <- cost$eps + P <- cost$P + P[P < eps] <- eps + cost$plogp <- colSums(P * logm(P, eps)) + + cost$plamphat <- P - lambda * cost$Phat + + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + + eps <- cost$eps + kl <- cost$plogp - colSums(cost$P * logm(cost$W / cost$Z, eps)) + klhat <- cost$phlogph - colSums(cost$Phat * logm(cost$What / cost$Zhat, eps)) + cost$pcost <- kl + lambda * klhat + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + + qlampqhat <- (cost$W / cost$Z) - lambda * (cost$What / cost$Zhat) + cost$G <- k2g(Y, 4 * cost$W * (cost$plamphat - qlampqhat)) + cost + }, + update = function(cost, Y) { + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + What <- 1 + W + W <- 1 / What + diag(W) <- 0 + cost$Z <- sum(W) + cost$W <- W + + diag(What) <- 0 + cost$What <- What + cost$Zhat <- sum(What) + + cost + } + ) +} diff --git a/smallvis/R/jse.R b/smallvis/R/jse.R new file mode 100644 index 0000000..fbc84fd --- /dev/null +++ b/smallvis/R/jse.R @@ -0,0 +1,206 @@ +# JSE --------------------------------------------------------------------- + +# Lee, J. A., Renard, E., Bernard, G., Dupont, P., & Verleysen, M. (2013). +# Type 1 and 2 mixtures of Kullback-Leibler divergences as cost functions in +# dimensionality reduction based on similarity preservation. +# \emph{Neurocomputing}, \emph{112}, 92-108. +# kappa = 0 behaves like ASNE +# kappa = 1 behaves like NeRV with lambda = 0. Yes that's confusing. +jse <- function(perplexity, + kappa = 0.5, + inp_kernel = "gaussian", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + eps0 <- 1e-5 + kappa <- max(kappa, eps0) + kappa <- min(kappa, 1 - eps0) + + kappa_inv <- 1 / kappa + m2_kappa_inv <- -2 * kappa_inv + om_kappa <- 1 - kappa + om_kappa_inv <- 1 / om_kappa + + lreplace( + ssne( + perplexity = perplexity, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = "none", + normalize = FALSE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + cost$eps <- eps + cost + }, + cache_input = function(cost) { + eps <- cost$eps + P <- cost$P + P[P < eps] <- eps + cost$plogp <- rowSums(P * logm(P, eps)) + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + eps <- cost$eps + + cost$pcost <- + om_kappa_inv * (cost$plogp - rowSums(cost$P * logm(cost$Z, eps))) + + kappa_inv * cost$QlQZc + + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + K <- m2_kappa_inv * cost$Q * (cost$lQZ - cost$QlQZc) + cost$G <- k2g(Y, K, symmetrize = TRUE) + + cost + }, + update = function(cost, Y) { + eps <- cost$eps + + Q <- expQ( + Y, + eps = eps, + is_symmetric = TRUE, + use_cpp = use_cpp, + n_threads = n_threads + )$Q + + Z <- kappa * cost$P + om_kappa * Q + Z[Z < eps] <- eps + diag(Z) <- 0 + + cost$Q <- Q + cost$Z <- Z + cost$lQZ <- logm(Q / Z, eps) + cost$QlQZc <- rowSums(Q * cost$lQZ) + + cost + }, + export = cost_export + ) +} + + +sjse <- function(perplexity, + kappa = 0.5, + inp_kernel = "gaussian", + symmetrize = "symmetric", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + eps0 <- 1e-5 + kappa <- max(kappa, eps0) + kappa <- min(kappa, 1 - eps0) + + kappa_inv <- 1 / kappa + m4_kappa_inv <- -4 * kappa_inv + om_kappa <- 1 - kappa + om_kappa_inv <- 1 / om_kappa + + lreplace( + ssne( + perplexity = perplexity, + inp_kernel = inp_kernel, + symmetrize = symmetrize, + eps = eps, + n_threads = n_threads, + use_cpp = use_cpp + ), + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + eps <- cost$eps + + cost$pcost <- + om_kappa_inv * (cost$plogp - colSums(cost$P * logm(cost$Z, eps))) + + kappa_inv * cost$QlQZc + + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + K <- m4_kappa_inv * cost$Q * (cost$lQZ - cost$QlQZs) + cost$G <- k2g(Y, K, symmetrize = FALSE) + + cost + }, + update = function(cost, Y) { + eps <- cost$eps + + Q <- expQ( + Y, + cost$eps, + is_symmetric = TRUE, + matrix_normalize = TRUE, + use_cpp = use_cpp, + n_threads = n_threads + )$Q + Z <- kappa * cost$P + om_kappa * Q + Z[Z < eps] <- eps + diag(Z) <- 0 + + cost$Q <- Q + cost$Z <- Z + cost$lQZ <- logm(Q / Z, eps) + cost$QlQZc <- colSums(Q * cost$lQZ) + cost$QlQZs <- sum(cost$QlQZc) + + cost + }, + export = cost_export + ) +} + +rsrjse <- function(perplexity, + kappa = 0.5, + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + jse( + perplexity = perplexity, + kappa = kappa, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + cost <- sne_init( + cost, + X, + perplexity = perplexity, + symmetrize = "symmetric", + normalize = FALSE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + P <- cost$P + P <- P / rowSums(P) + cost$P <- P + + cost$eps <- eps + cost + } + ) +} diff --git a/smallvis/R/largevis.R b/smallvis/R/largevis.R new file mode 100644 index 0000000..2df54bd --- /dev/null +++ b/smallvis/R/largevis.R @@ -0,0 +1,71 @@ +# LargeVis ---------------------------------------------------------------- + +largevis <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + gamma = 1, + gr_eps = 0.1, + normalize = TRUE, + eps = 1e-9, + row_weight = NULL, + use_cpp = FALSE, + n_threads = 0) { + if (!is.null(row_weight)) { + row_normalize <- row_weight + } else { + row_normalize <- TRUE + } + lreplace( + tsne(perplexity, use_cpp = use_cpp, n_threads = n_threads), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + cost <- sne_init( + cost, + X = X, + perplexity = perplexity, + symmetrize = symmetrize, + kernel = inp_kernel, + normalize = normalize, + verbose = verbose, + row_normalize = row_normalize, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + cost$eps <- eps + cost$greps1 <- gr_eps - 1 + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + + P <- cost$P + eps <- cost$eps + W <- cost$W + + cost$pcost <- colSums(-P * logm(W, eps) - gamma * log1p(-W + eps)) + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + W <- cost$W + cost$G <- k2g(Y, 4 * W * (cost$P - ((gamma * W) / ( + 1 + cost$greps1 * W + )))) + cost + }, + update = function(cost, Y) { + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + W <- 1 / (1 + W) + diag(W) <- 0 + + cost$W <- W + cost + }, + export = cost_export + ) +} diff --git a/smallvis/R/nerv.R b/smallvis/R/nerv.R new file mode 100644 index 0000000..5202828 --- /dev/null +++ b/smallvis/R/nerv.R @@ -0,0 +1,281 @@ +# NeRV -------------------------------------------------------------------- + +# Venna, J., Peltonen, J., Nybo, K., Aidos, H., & Kaski, S. (2010). +# Information retrieval perspective to nonlinear dimensionality reduction for +# data visualization. +# \emph{Journal of Machine Learning Research}, \emph{11}, 451-490. +# +# Unlike original publication, won't transfer input precisions to output kernel +# lambda = 1 gives ASNE results +# default lambda = 0.9 from "Majorization-Minimization for Manifold Embedding" +# Yang, Peltonen, Kaski 2015 +nerv <- function(perplexity, + lambda = 0.9, + inp_kernel = "gaussian", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lambda2 <- 2 * lambda + oml <- 1 - lambda + oml2 <- 2 * oml + lreplace( + tsne( + perplexity = perplexity, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = "none", + normalize = FALSE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + cost$eps <- eps + cost + }, + cache_input = function(cost) { + eps <- cost$eps + P <- cost$P + P[P < eps] <- eps + cost$P <- P + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + + P <- cost$P + + kl_fwd <- rowSums(P * cost$lPQ) + + cost$pcost <- lambda * kl_fwd + oml * cost$kl_rev + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + Q <- cost$Q + + # Total K including multiplying by 2 in gradient + K <- lambda2 * (cost$P - Q) + oml2 * Q * (cost$lPQ + cost$kl_rev) + + cost$G <- k2g(Y, K, symmetrize = TRUE) + cost + }, + update = function(cost, Y) { + eps <- cost$eps + + Q <- expQ( + Y, + eps, + is_symmetric = TRUE, + use_cpp = use_cpp, + n_threads = n_threads + )$Q + cost$Q <- Q + + # Reverse KL gradient + cost$lPQ <- logm(cost$P / Q, eps) + # for KLrev we want Q * log(Q/P), so take -ve of log(P/Q) + cost$kl_rev <- rowSums(Q * -cost$lPQ) + cost + }, + sentinel = "Q", + export = function(cost, val) { + res <- cost_export(cost, val) + res + } + ) +} + +snerv <- function(perplexity, + lambda = 0.9, + inp_kernel = "gaussian", + symmetrize = "symmetric", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lambda4 <- 4 * lambda + oml <- 1 - lambda + oml4 <- 4 * oml + lreplace( + ssne( + perplexity = perplexity, + inp_kernel = inp_kernel, + symmetrize = symmetrize, + eps = eps, + n_threads = n_threads + ), + cache_input = function(cost) { + eps <- cost$eps + P <- cost$P + P[P < eps] <- eps + cost$P <- P + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + + P <- cost$P + + kl_fwd <- colSums(P * cost$lPQ) + + cost$pcost <- lambda * kl_fwd + oml * cost$kl_rev + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + Q <- cost$Q + + # Total K including multiplying by 4 in gradient + K <- lambda4 * (cost$P - Q) + oml4 * Q * (cost$lPQ + cost$QlPQs) + + cost$G <- k2g(Y, K, symmetrize = FALSE) + cost + }, + update = function(cost, Y) { + eps <- cost$eps + + Q <- expQ( + Y, + eps, + is_symmetric = TRUE, + matrix_normalize = TRUE, + use_cpp = use_cpp, + n_threads = n_threads + )$Q + cost$Q <- Q + + # Reverse KL gradient + cost$lPQ <- logm(cost$P / Q, eps) + cost$kl_rev <- colSums(Q * -cost$lPQ) + cost$QlPQs <- sum(cost$kl_rev) + cost + }, + export = function(cost, val) { + res <- cost_export(cost, val) + res + } + ) +} + +# NeRV with input bandwidths transferred to the output kernel, as in the +# original paper. +bnerv <- function(perplexity, + lambda = 0.9, + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lambda2 <- 2 * lambda + oml <- 1 - lambda + oml2 <- 2 * oml + lreplace( + nerv( + perplexity = perplexity, + lambda = lambda, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + ret_extra <- unique(c(ret_extra, "beta")) + + cost <- sne_init( + cost, + X, + perplexity = perplexity, + symmetrize = "none", + normalize = FALSE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + cost$eps <- eps + cost$lambda2b <- lambda2 * cost$beta + cost$oml2b <- oml2 * cost$beta + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + Q <- cost$Q + + # Total K including multiplying by 2 * beta in gradient + K <- cost$lambda2b * (cost$P - Q) + cost$oml2b * Q * (cost$lPQ + cost$kl_rev) + + cost$G <- k2g(Y, K, symmetrize = TRUE) + cost + + cost + }, + update = function(cost, Y) { + eps <- cost$eps + + Q <- expQ( + Y, + eps, + beta = cost$beta, + is_symmetric = FALSE, + use_cpp = use_cpp, + n_threads = n_threads + )$Q + cost$Q <- Q + + # Reverse KL gradient + cost$lPQ <- logm(cost$P / Q, eps) + cost$kl_rev <- rowSums(Q * -cost$lPQ) + + cost + } + ) +} + +rsrnerv <- function(perplexity, + lambda = 0.9, + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + nerv( + perplexity = perplexity, + lambda = lambda, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + cost <- sne_init( + cost, + X, + perplexity = perplexity, + symmetrize = "symmetric", + normalize = FALSE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + P <- cost$P + P <- P / rowSums(P) + cost$P <- P + + cost$eps <- eps + cost + } + ) +} From 4c851ab80400dabecd3bb4594bed503e9ba66bb4 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sat, 3 Aug 2024 12:30:38 -0700 Subject: [PATCH 17/26] reorganize some SNE functions --- smallvis/R/cost.R | 94 +++++++++++- smallvis/R/ee.R | 76 ++++++++++ smallvis/R/perplexity.R | 204 ++++++++++++++++++++++++++ smallvis/R/sne.R | 316 ---------------------------------------- 4 files changed, 369 insertions(+), 321 deletions(-) diff --git a/smallvis/R/cost.R b/smallvis/R/cost.R index ec18e95..524b29b 100644 --- a/smallvis/R/cost.R +++ b/smallvis/R/cost.R @@ -1,5 +1,13 @@ # Generic Weight/Cost/Gradient functions ---------------------------------- +# Convert Force constant to Gradient +k2g <- function(Y, K, symmetrize = FALSE) { + if (symmetrize) { + K <- K + t(K) + } + Y * colSums(K) - (K %*% Y) +} + logm <- function(m, eps = .Machine$double.eps) { diag(m) <- eps m <- log(m) @@ -21,12 +29,88 @@ powm <- function(m, n, eps = .Machine$double.eps) { m } -# Convert Force constant to Gradient -k2g <- function(Y, K, symmetrize = FALSE) { - if (symmetrize) { - K <- K + t(K) +# Calculates shifted exponential column-wise: exp(X - a) +# where a is the column max. +# This is the log-sum-exp trick to avoid numeric underflow: +# log sum_i exp x_i = a + log sum_i exp(x_i - a) +# => sum_i exp x_i = exp a * sum_i exp(x_i - a) +# with a = max x_i +# exp(max x_i) can still underflow so we don't return Z (the sum) +# Use Q directly (exp a appears in numerator and denominator, so cancels). +# https://statmodeling.stat.columbia.edu/2016/06/11/log-sum-of-exponentials/ +# https://www.xarg.org/2016/06/the-log-sum-exp-trick-in-machine-learning/ +# http://wittawat.com/posts/log-sum_exp_underflow.html +exp_shift <- function(X) { + X <- exp(sweep(X, 2, apply(X, 2, max))) +} + +expQ <- function(Y, eps = .Machine$double.eps, beta = NULL, + A = NULL, + is_symmetric = FALSE, + matrix_normalize = FALSE, + use_cpp = FALSE, + n_threads = 1) { + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + + if (!is.null(beta)) { + W <- exp_shift(-W * beta) } - Y * colSums(K) - (K %*% Y) + else { + W <- exp_shift(-W) + } + + if (!is.null(A)) { + W <- A * W + } + diag(W) <- 0 + + if (matrix_normalize) { + Z <- sum(W) + } + else { + if (is_symmetric) { + Z <- colSums(W) + } + else { + Z <- rowSums(W) + } + } + # cost of division (vs storing 1/Z and multiplying) seems small + Q <- W / Z + + if (eps > 0) { + Q[Q < eps] <- eps + } + diag(Q) <- 0 + + list( + Q = Q, + Z = Z + ) +} + +# KL divergence using Q directly +kl_costQ <- function(cost, Y) { + cost <- cost_update(cost, Y) + + # P log(P / Q) = P log P - P log Q + cost$pcost <- cost$plogp - colSums(cost$P * logm(cost$Q, cost$eps)) + cost +} + +kl_costQr <- function(cost, Y) { + cost <- cost_update(cost, Y) + + # P log(P / Q) = P log P - P log Q + cost$pcost <- cost$plogp - rowSums(cost$P * logm(cost$Q, cost$eps)) + cost +} + +kl_cost <- function(cost, Y) { + cost <- cost_update(cost, Y) + # P log(P / Q) = P log P - P log Q + cost$pcost <- cost$plogp - colSums(cost$P * logm(cost$W / cost$Z, cost$eps)) + cost } cost_init <- function(cost, diff --git a/smallvis/R/ee.R b/smallvis/R/ee.R index 87be379..1d8911b 100644 --- a/smallvis/R/ee.R +++ b/smallvis/R/ee.R @@ -83,3 +83,79 @@ ee <- function(perplexity, sentinel = "W" ) } + + +# t-Distributed Elastic Embedding +# EE-like cost function in terms of I-Divergence +# Scaled to give a gradient similar in form to t-SNE +tee <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + lambda = 0.01, + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + tsne( + perplexity = perplexity, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + ret_extra <- unique(c(ret_extra, "V", "dint")) + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = FALSE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + V <- cost$P + cost$eps <- eps + cost$invN <- 1 / sum(V) + cost$gradconst <- 4 * cost$invN + cost$lambda <- lambda + + V[V < eps] <- eps + cost$constV <- cost$invN * (colSums(V * logm(V, eps)) - lambda * colSums(V)) + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$G <- k2g(Y, cost$gradconst * cost$W * (cost$P - cost$W * cost$lambda)) + cost + }, + update = function(cost, Y) { + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + W <- 1 / (1 + W) + diag(W) <- 0 + cost$W <- W + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + + V <- cost$P + W <- cost$W + eps <- cost$eps + + cost$pcost <- cost$constV + + cost$invN * (cost$lambda * colSums(W) - colSums(V * logm(W, eps))) + cost + }, + exaggerate = function(cost, exaggeration_factor) { + cost$V <- cost$V * exaggeration_factor + cost + } + ) +} diff --git a/smallvis/R/perplexity.R b/smallvis/R/perplexity.R index 32cc502..d2e59f1 100644 --- a/smallvis/R/perplexity.R +++ b/smallvis/R/perplexity.R @@ -440,3 +440,207 @@ user_idp_perps <- function(perplexity) { } perplexities } + +# Some symmetrization options mean "actually, no symmetrization please". This +# function returns the ones that will actually produce a symmetric matrix, +# necessary for symmetric methods (e.g. tsne vs asne). +true_symmetrize_options <- function() { + c("symmetric", "average", "mutual", "umap", "fuzzy") +} + +scale_affinities <- function(P, + symmetrize = "symmetric", + row_normalize = TRUE, + normalize = TRUE) { + # row normalization before anything else + if (nnat(row_normalize)) { + if (symmetrize == "rowsymm") { + P <- 0.5 * (P + t(P)) + symmetrize <- "none" + } + P <- P / rowSums(P) + } else if (is.numeric(row_normalize)) { + P <- row_normalize * P / rowSums(P) + } + + # Symmetrize + P <- switch(symmetrize, + none = P, + symmetric = 0.5 * (P + t(P)), + average = 0.5 * (P + t(P)), + mutual = sqrt(P * t(P)), + umap = fuzzy_set_union(P), + fuzzy = fuzzy_set_union(P), + stop("unknown symmetrization: ", symmetrize) + ) + # Normalize + if (normalize) { + P <- P / sum(P) + } + P +} + +sne_init <- function(cost, + X, + perplexity, + kernel = "gaussian", + symmetrize = "symmetric", + row_normalize = TRUE, + normalize = TRUE, + n_threads = 0, + use_cpp = use_cpp, + verbose = FALSE, + ret_extra = c()) { + if (tolower(kernel) == "knn") { + if (is.character(perplexity) || is.list(perplexity)) { + stop("Can't use intrinsic dimensionality with knn kernel") + } + if (length(perplexity) > 1) { + stop("Can't use multiple perplexities with knn kernel") + } + tsmessage("Using knn kernel with k = ", formatC(perplexity)) + P <- knn_graph(X, + k = perplexity, + n_threads = n_threads, + verbose = verbose + ) + x2ares <- list(W = P) + } else if (tolower(kernel) == "skd") { + P <- smooth_knn_distances( + X, + k = perplexity, + tol = 1e-5, + n_threads = n_threads, + verbose = verbose + )$P + row_normalize <- FALSE + x2ares <- list(W = P) + } else if (perp_method(perplexity) == "idp") { + perplexities <- NULL + if (is.list(perplexity) && length(perplexity) == 2) { + perplexities <- perplexity[[2]] + } + + x2ares <- idp(X, + perplexities = perplexities, + tol = 1e-5, + verbose = verbose + ) + P <- x2ares$W + ret_extra <- unique(c(ret_extra, "idp")) + } else if (perp_method(perplexity) == "multiscale") { + perplexities <- NULL + if (is.list(perplexity) && length(perplexity) == 2) { + perplexities <- perplexity[[2]] + } + + mspres <- msp( + X, + perplexities = perplexities, + tol = 1e-5, + symmetrize = symmetrize, + row_normalize = row_normalize, + normalize = normalize, + verbose = verbose + ) + cost$P <- mspres$P + return(cost) + } else if (tolower(kernel) == "sigma") { + tsmessage("Using fixed sigma = ", formatC(perplexity)) + x2ares <- x2aff_sigma( + X, + sigma = perplexity, + n_threads = n_threads, + use_cpp = use_cpp, + verbose = verbose + ) + P <- x2ares$W + } else { + if (!is.numeric(perplexity)) { + stop("Unknown perplexity method, '", perplexity[[1]], "'") + } + tsmessage( + "Commencing calibration for perplexity = ", + format_perps(perplexity) + ) + if (use_cpp) { + P <- find_beta_cpp(X, perplexity, tol = 1e-5, n_threads = n_threads)$W + } else { + x2ares <- x2aff( + X, + perplexity, + tol = 1e-5, + kernel = kernel, + verbose = verbose + ) + P <- x2ares$W + } + } + + P <- scale_affinities( + P, + symmetrize = symmetrize, + row_normalize = row_normalize, + normalize = normalize + ) + cost$P <- P + + if (is.logical(row_normalize)) { + tsmessage( + "Effective perplexity of P approx = ", + formatC(stats::median(perpp(P))) + ) + } + + for (r in unique(tolower(ret_extra))) { + switch(r, + v = { + cost$V <- x2ares$W + }, + dint = { + if (!is.null(x2ares$dint)) { + cost$dint <- x2ares$dint + } + }, + beta = { + if (!is.null(x2ares$beta)) { + cost$beta <- x2ares$beta + } + }, + adegc = { + cost$adegc <- 0.5 * rowSums(x2ares$W) + colSums(x2ares$W) + }, + adegin = { + cost$adegin <- rowSums(x2ares$W) + }, + adegout = { + cost$adegout <- colSums(x2ares$W) + }, + pdeg = { + cost$pdeg <- colSums(P) + }, + idp = { + if (!is.null(x2ares$idp)) { + cost$idp <- x2ares$idp + } + } + ) + } + cost +} + +# The intrinsic dimensionality associated with a gaussian affinity vector +# Convenient only from in x2aff, where all these values are available +intd_x2aff <- function(D2, beta, W, Z, H, eps = .Machine$double.eps) { + P <- W / Z + -2 * beta * sum(D2 * P * (log(P + eps) + H)) +} + +shannonpr <- function(P, eps = .Machine$double.eps) { + P <- P / rowSums(P) + rowSums(-P * log(P + eps)) +} + +perpp <- function(P) { + exp(shannonpr(P)) +} diff --git a/smallvis/R/sne.R b/smallvis/R/sne.R index e58c611..318432b 100644 --- a/smallvis/R/sne.R +++ b/smallvis/R/sne.R @@ -1,87 +1,3 @@ -# Calculates shifted exponential column-wise: exp(X - a) -# where a is the column max. -# This is the log-sum-exp trick to avoid numeric underflow: -# log sum_i exp x_i = a + log sum_i exp(x_i - a) -# => sum_i exp x_i = exp a * sum_i exp(x_i - a) -# with a = max x_i -# exp(max x_i) can still underflow so we don't return Z (the sum) -# Use Q directly (exp a appears in numerator and denominator, so cancels). -# https://statmodeling.stat.columbia.edu/2016/06/11/log-sum-of-exponentials/ -# https://www.xarg.org/2016/06/the-log-sum-exp-trick-in-machine-learning/ -# http://wittawat.com/posts/log-sum_exp_underflow.html -exp_shift <- function(X) { - X <- exp(sweep(X, 2, apply(X, 2, max))) -} - -expQ <- function(Y, eps = .Machine$double.eps, beta = NULL, - A = NULL, - is_symmetric = FALSE, - matrix_normalize = FALSE, - use_cpp = FALSE, - n_threads = 1) { - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - - if (!is.null(beta)) { - W <- exp_shift(-W * beta) - } - else { - W <- exp_shift(-W) - } - - if (!is.null(A)) { - W <- A * W - } - diag(W) <- 0 - - if (matrix_normalize) { - Z <- sum(W) - } - else { - if (is_symmetric) { - Z <- colSums(W) - } - else { - Z <- rowSums(W) - } - } - # cost of division (vs storing 1/Z and multiplying) seems small - Q <- W / Z - - if (eps > 0) { - Q[Q < eps] <- eps - } - diag(Q) <- 0 - - list( - Q = Q, - Z = Z - ) -} - -# KL divergence using Q directly -kl_costQ <- function(cost, Y) { - cost <- cost_update(cost, Y) - - # P log(P / Q) = P log P - P log Q - cost$pcost <- cost$plogp - colSums(cost$P * logm(cost$Q, cost$eps)) - cost -} - -kl_costQr <- function(cost, Y) { - cost <- cost_update(cost, Y) - - # P log(P / Q) = P log P - P log Q - cost$pcost <- cost$plogp - rowSums(cost$P * logm(cost$Q, cost$eps)) - cost -} - -kl_cost <- function(cost, Y) { - cost <- cost_update(cost, Y) - # P log(P / Q) = P log P - P log Q - cost$pcost <- cost$plogp - colSums(cost$P * logm(cost$W / cost$Z, cost$eps)) - cost -} - # t-SNE tsne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", normalize = TRUE, row_normalize = TRUE, @@ -512,61 +428,6 @@ pstsne <- function(perplexity, inp_kernel = "gaussian", ) } -# t-Distributed Elastic Embedding -# EE-like cost function in terms of I-Divergence -# Scaled to give a gradient similar in form to t-SNE -tee <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", - lambda = 0.01, eps = .Machine$double.eps, n_threads = 0, - use_cpp = FALSE) { - lreplace( - tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - ret_extra = unique(c(ret_extra, "V", "dint")) - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = FALSE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - V <- cost$P - cost$eps <- eps - cost$invN <- 1 / sum(V) - cost$gradconst <- 4 * cost$invN - cost$lambda <- lambda - - V[V < eps] <- eps - cost$constV <- cost$invN * (colSums(V * logm(V, eps)) - lambda * colSums(V)) - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - cost$G <- k2g(Y, cost$gradconst * cost$W * (cost$P - cost$W * cost$lambda)) - cost - }, - update = function(cost, Y) { - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - W <- 1 / (1 + W) - diag(W) <- 0 - cost$W <- W - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - - V <- cost$P - W <- cost$W - eps <- cost$eps - - cost$pcost <- cost$constV + - cost$invN * (cost$lambda * colSums(W) - colSums(V * logm(W, eps))) - cost - }, - exaggerate = function(cost, exaggeration_factor) { - cost$V <- cost$V * exaggeration_factor - cost - } - ) -} - # UMAP/t-SNE Hybrids ------------------------------------------------------ # Calculate P via normalized smooth knn-distances @@ -951,181 +812,4 @@ arsrsne <- function(perplexity, eps = .Machine$double.eps, n_threads = 0, ) } -# Perplexity Calibration -------------------------------------------------- - -# Some symmetrization options mean "actually, no symmetrization please". This -# function returns the ones that will actually produce a symmetric matrix, -# necessary for symmetric methods (e.g. tsne vs asne). -true_symmetrize_options <- function() { - c("symmetric", "average", "mutual", "umap", "fuzzy") -} - -scale_affinities <- function(P, symmetrize = "symmetric", row_normalize = TRUE, - normalize = TRUE) { - # row normalization before anything else - if (nnat(row_normalize)) { - if (symmetrize == "rowsymm") { - P <- 0.5 * (P + t(P)) - symmetrize <- "none" - } - P <- P / rowSums(P) - } - else if (is.numeric(row_normalize)) { - P <- row_normalize * P / rowSums(P) - } - - # Symmetrize - P <- switch(symmetrize, - none = P, - symmetric = 0.5 * (P + t(P)), - average = 0.5 * (P + t(P)), - mutual = sqrt(P * t(P)), - umap = fuzzy_set_union(P), - fuzzy = fuzzy_set_union(P), - stop("unknown symmetrization: ", symmetrize)) - # Normalize - if (normalize) { - P <- P / sum(P) - } - P -} - -sne_init <- function(cost, - X, - perplexity, - kernel = "gaussian", - symmetrize = "symmetric", - row_normalize = TRUE, - normalize = TRUE, - n_threads = 0, - use_cpp = use_cpp, - verbose = FALSE, - ret_extra = c()) { - - if (tolower(kernel) == "knn") { - if (is.character(perplexity) || is.list(perplexity)) { - stop("Can't use intrinsic dimensionality with knn kernel") - } - if (length(perplexity) > 1) { - stop("Can't use multiple perplexities with knn kernel") - } - tsmessage("Using knn kernel with k = ", formatC(perplexity)) - P <- knn_graph(X, k = perplexity, n_threads = n_threads, verbose = verbose) - x2ares <- list(W = P) - } - else if (tolower(kernel) == "skd") { - P <- smooth_knn_distances(X, k = perplexity, tol = 1e-5, - n_threads = n_threads, verbose = verbose)$P - row_normalize <- FALSE - x2ares <- list(W = P) - } - else if (perp_method(perplexity) == "idp") { - perplexities <- NULL - if (is.list(perplexity) && length(perplexity) == 2) { - perplexities <- perplexity[[2]] - } - - x2ares <- idp(X, perplexities = perplexities, tol = 1e-5, - verbose = verbose) - P <- x2ares$W - ret_extra <- unique(c(ret_extra, "idp")) - } - else if (perp_method(perplexity) == "multiscale") { - perplexities <- NULL - if (is.list(perplexity) && length(perplexity) == 2) { - perplexities <- perplexity[[2]] - } - - mspres <- msp(X, perplexities = perplexities, tol = 1e-5, - symmetrize = symmetrize, - row_normalize = row_normalize, - normalize = normalize, - verbose = verbose) - cost$P <- mspres$P - return(cost) - } - else if (tolower(kernel) == "sigma") { - tsmessage("Using fixed sigma = ", formatC(perplexity)) - x2ares <- x2aff_sigma(X, sigma = perplexity, n_threads = n_threads, - use_cpp = use_cpp, verbose = verbose) - P <- x2ares$W - } - else { - if (!is.numeric(perplexity)) { - stop("Unknown perplexity method, '", perplexity[[1]], "'") - } - tsmessage("Commencing calibration for perplexity = ", - format_perps(perplexity)) - if (use_cpp) { - P <- find_beta_cpp(X, perplexity, tol = 1e-5, n_threads = n_threads)$W - } - else { - x2ares <- x2aff(X, perplexity, tol = 1e-5, kernel = kernel, verbose = verbose) - P <- x2ares$W - } - } - - P <- scale_affinities(P, - symmetrize = symmetrize, - row_normalize = row_normalize, - normalize = normalize) - cost$P <- P - - if (is.logical(row_normalize)) { - tsmessage("Effective perplexity of P approx = ", - formatC(stats::median(perpp(P)))) - } - - for (r in unique(tolower(ret_extra))) { - switch(r, - v = { - cost$V <- x2ares$W - }, - dint = { - if (!is.null(x2ares$dint)) { - cost$dint <- x2ares$dint - } - }, - beta = { - if (!is.null(x2ares$beta)) { - cost$beta <- x2ares$beta - } - }, - adegc = { - cost$adegc <- 0.5 * rowSums(x2ares$W) + colSums(x2ares$W) - }, - adegin = { - cost$adegin <- rowSums(x2ares$W) - }, - adegout = { - cost$adegout <- colSums(x2ares$W) - }, - pdeg = { - cost$pdeg <- colSums(P) - }, - idp = { - if (!is.null(x2ares$idp)) { - cost$idp <- x2ares$idp - } - } - ) - } - cost -} - -# The intrinsic dimensionality associated with a gaussian affinity vector -# Convenient only from in x2aff, where all these values are available -intd_x2aff <- function(D2, beta, W, Z, H, eps = .Machine$double.eps) { - P <- W / Z - -2 * beta * sum(D2 * P * (log(P + eps) + H)) -} - -shannonpr <- function(P, eps = .Machine$double.eps) { - P <- P / rowSums(P) - rowSums(-P * log(P + eps)) -} - -perpp <- function(P) { - exp(shannonpr(P)) -} From 4daa1a773ee5f7c84eb2860805ad35e0dc9f967d Mon Sep 17 00:00:00 2001 From: James Melville Date: Sat, 3 Aug 2024 12:32:36 -0700 Subject: [PATCH 18/26] reformat --- smallvis/R/cost.R | 32 +- smallvis/R/sne.R | 1097 ++++++++++++++++++++++++++++++--------------- 2 files changed, 761 insertions(+), 368 deletions(-) diff --git a/smallvis/R/cost.R b/smallvis/R/cost.R index 524b29b..de9219c 100644 --- a/smallvis/R/cost.R +++ b/smallvis/R/cost.R @@ -44,55 +44,51 @@ exp_shift <- function(X) { X <- exp(sweep(X, 2, apply(X, 2, max))) } -expQ <- function(Y, eps = .Machine$double.eps, beta = NULL, +expQ <- function(Y, + eps = .Machine$double.eps, + beta = NULL, A = NULL, is_symmetric = FALSE, matrix_normalize = FALSE, use_cpp = FALSE, n_threads = 1) { W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - + if (!is.null(beta)) { W <- exp_shift(-W * beta) - } - else { + } else { W <- exp_shift(-W) } - + if (!is.null(A)) { W <- A * W } diag(W) <- 0 - + if (matrix_normalize) { Z <- sum(W) - } - else { + } else { if (is_symmetric) { Z <- colSums(W) - } - else { + } else { Z <- rowSums(W) } } # cost of division (vs storing 1/Z and multiplying) seems small Q <- W / Z - + if (eps > 0) { Q[Q < eps] <- eps } diag(Q) <- 0 - - list( - Q = Q, - Z = Z - ) + + list(Q = Q, Z = Z) } # KL divergence using Q directly kl_costQ <- function(cost, Y) { cost <- cost_update(cost, Y) - + # P log(P / Q) = P log P - P log Q cost$pcost <- cost$plogp - colSums(cost$P * logm(cost$Q, cost$eps)) cost @@ -100,7 +96,7 @@ kl_costQ <- function(cost, Y) { kl_costQr <- function(cost, Y) { cost <- cost_update(cost, Y) - + # P log(P / Q) = P log P - P log Q cost$pcost <- cost$plogp - rowSums(cost$P * logm(cost$Q, cost$eps)) cost diff --git a/smallvis/R/sne.R b/smallvis/R/sne.R index 318432b..e6acda2 100644 --- a/smallvis/R/sne.R +++ b/smallvis/R/sne.R @@ -1,16 +1,32 @@ # t-SNE -tsne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", - normalize = TRUE, row_normalize = TRUE, - eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { +tsne <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + normalize = TRUE, + row_normalize = TRUE, + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { list( - init = function(cost, X, max_iter, verbose = FALSE, + init = function(cost, + X, + max_iter, + verbose = FALSE, ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = normalize, - row_normalize = row_normalize, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = normalize, + row_normalize = row_normalize, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) cost$eps <- eps cost }, @@ -30,8 +46,7 @@ tsne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", P <- cost$P if (use_cpp) { cost$G <- tsne_grad_cpp(P, cost$W, cost$Z, Y, n_threads = n_threads) - } - else { + } else { cost$G <- k2g(Y, 4 * cost$W * (P - cost$W / cost$Z)) } cost @@ -41,9 +56,10 @@ tsne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", if (is.null(res)) { switch(val, - q = { - res <- cost$W / cost$Z - }) + q = { + res <- cost$W / cost$Z + } + ) } res }, @@ -66,23 +82,37 @@ tsne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", # Cook, J., Sutskever, I., Mnih, A., & Hinton, G. E. (2007). # Visualizing similarity data with a mixture of maps. # In \emph{International Conference on Artificial Intelligence and Statistics} (pp. 67-74). -ssne <- function(perplexity, inp_kernel = "gaussian", - symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { +ssne <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { lreplace( - tsne(perplexity = perplexity, inp_kernel = inp_kernel, - symmetrize = symmetrize, eps = eps, n_threads = n_threads, - use_cpp = use_cpp), + tsne( + perplexity = perplexity, + inp_kernel = inp_kernel, + symmetrize = symmetrize, + eps = eps, + n_threads = n_threads, + use_cpp = use_cpp + ), pfn = kl_costQ, gr = function(cost, Y) { cost <- cost$update(cost, Y) cost$G <- k2g(Y, 4 * (cost$P - cost$Q), symmetrize = FALSE) - + cost }, update = function(cost, Y) { - cost$Q <- expQ(Y, cost$eps, is_symmetric = TRUE, matrix_normalize = TRUE, - use_cpp = use_cpp, n_threads = n_threads)$Q + cost$Q <- expQ( + Y, + cost$eps, + is_symmetric = TRUE, + matrix_normalize = TRUE, + use_cpp = use_cpp, + n_threads = n_threads + )$Q cost }, sentinel = "Q" @@ -92,15 +122,30 @@ ssne <- function(perplexity, inp_kernel = "gaussian", # Hinton, G. E., & Roweis, S. T. (2002). # Stochastic neighbor embedding. # In \emph{Advances in neural information processing systems} (pp. 833-840). -asne <- function(perplexity, inp_kernel = "gaussian", - eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { - lreplace(tsne(perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, +asne <- function(perplexity, + inp_kernel = "gaussian", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + tsne(perplexity, use_cpp = use_cpp, n_threads = n_threads), + init = function(cost, + X, + max_iter, + verbose = FALSE, ret_extra = c()) { - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = "none", normalize = FALSE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = "none", + normalize = FALSE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) cost$eps <- eps cost }, @@ -112,8 +157,13 @@ asne <- function(perplexity, inp_kernel = "gaussian", cost }, update = function(cost, Y) { - cost$Q <- expQ(Y, eps = cost$eps, is_symmetric = FALSE, - use_cpp = use_cpp, n_threads = n_threads)$Q + cost$Q <- expQ( + Y, + eps = cost$eps, + is_symmetric = FALSE, + use_cpp = use_cpp, + n_threads = n_threads + )$Q cost }, sentinel = "Q" @@ -124,14 +174,23 @@ asne <- function(perplexity, inp_kernel = "gaussian", # Yang, Z., King, I., Xu, Z., & Oja, E. (2009). # Heavy-tailed symmetric stochastic neighbor embedding. # In \emph{Advances in neural information processing systems} (pp. 2169-2177). -hssne <- function(perplexity, alpha = 0.5, inp_kernel = "gaussian", - symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { +hssne <- function(perplexity, + alpha = 0.5, + inp_kernel = "gaussian", + symmetrize = "symmetric", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { alpha <- max(alpha, 1e-8) apow <- -1 / alpha lreplace( - tsne(perplexity = perplexity, inp_kernel = inp_kernel, - symmetrize = symmetrize, eps = eps, n_threads = n_threads), + tsne( + perplexity = perplexity, + inp_kernel = inp_kernel, + symmetrize = symmetrize, + eps = eps, + n_threads = n_threads + ), gr = function(cost, Y) { cost <- cost_update(cost, Y) # to include bandwidth @@ -156,21 +215,40 @@ hssne <- function(perplexity, alpha = 0.5, inp_kernel = "gaussian", # exists to demonstrate that constant beta doesn't have any meaningful effect # on the results. -bhssne <- function(perplexity, alpha = 0.5, beta = 1, - eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { +bhssne <- function(perplexity, + alpha = 0.5, + beta = 1, + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { alpha <- max(alpha, 1e-8) beta <- max(beta, 1e-8) lreplace( - tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), + tsne( + perplexity = perplexity, + use_cpp = use_cpp, + n_threads = n_threads + ), b4 = 4 * beta, ab = alpha * beta, apow = -1 / alpha, - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - ret_extra <- unique(c(ret_extra, 'beta')) - cost <- sne_init(cost, X, perplexity = perplexity, - symmetrize = "symmetric", normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + ret_extra <- unique(c(ret_extra, "beta")) + cost <- sne_init( + cost, + X, + perplexity = perplexity, + symmetrize = "symmetric", + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) # override input bandwidths with fixed beta (although this doesn't do much) if (!is.null(beta)) { cost$beta <- beta @@ -189,7 +267,7 @@ bhssne <- function(perplexity, alpha = 0.5, beta = 1, W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- powm(cost$ab * W + 1, cost$apow, cost$eps) diag(W) <- 0 - + cost$Z <- sum(W) cost$W <- W cost @@ -198,17 +276,29 @@ bhssne <- function(perplexity, alpha = 0.5, beta = 1, } # A version of HSSNE where alpha is allowed to vary at every epoch -dhssne <- function(perplexity, alpha = 0.5, inp_kernel = "gaussian", - symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { +dhssne <- function(perplexity, + alpha = 0.5, + inp_kernel = "gaussian", + symmetrize = "symmetric", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { alpha_min <- 1e-8 alpha <- max(alpha, alpha_min) lreplace( - tsne(perplexity = perplexity, inp_kernel = inp_kernel, - symmetrize = symmetrize, eps = eps, n_threads = n_threads), + tsne( + perplexity = perplexity, + inp_kernel = inp_kernel, + symmetrize = symmetrize, + eps = eps, + n_threads = n_threads + ), gr = function(cost, Y) { cost <- cost_update(cost, Y) - cost$G <- k2g(Y, 4 * (cost$P - cost$W / cost$Z) * powm(cost$W, cost$alpha, cost$eps)) + cost$G <- k2g( + Y, + 4 * (cost$P - cost$W / cost$Z) * powm(cost$W, cost$alpha, cost$eps) + ) cost }, epoch = function(opt, cost, iter, Y, fn_val) { @@ -261,18 +351,37 @@ dhssne <- function(perplexity, alpha = 0.5, inp_kernel = "gaussian", # Optimization equivalence of divergences improves neighbor embedding. # In \emph{Proceedings of the 31st International Conference on Machine Learning (ICML-14)} # (pp. 460-468). -wtsne <- function(perplexity, inp_kernel = "gaussian", - symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { - lreplace(tsne(perplexity = perplexity, use_cpp = use_cpp, - n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { +wtsne <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + tsne( + perplexity = perplexity, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) ret_extra <- c(ret_extra, "pdeg") - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) # P matrix degree centrality: column sums deg <- cost$pdeg if (verbose) { @@ -303,56 +412,99 @@ wtsne <- function(perplexity, inp_kernel = "gaussian", ) } -wssne <- function(perplexity, inp_kernel = "gaussian", - symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { - lreplace(ssne(perplexity = perplexity, use_cpp = use_cpp, - n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - ret_extra <- c(ret_extra, "pdeg") - cost <- sne_init(cost, X, perplexity = perplexity, - kernel = inp_kernel, symmetrize = symmetrize, - normalize = TRUE, verbose = verbose, - ret_extra = ret_extra, n_threads = n_threads, - use_cpp = use_cpp) - # P matrix degree centrality: column sums - deg <- cost$pdeg - if (verbose) { - summarize(deg, "deg", verbose = verbose) - } - cost$M <- outer(deg, deg) - cost$invM <- 1 / cost$M - cost$eps <- eps - - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - cost$G <- k2g(Y, 4 * (cost$P - cost$Q)) - cost - }, - update = function(cost, Y) { - cost$Q <- expQ(Y, cost$eps, A = cost$M, matrix_normalize = TRUE, - use_cpp = use_cpp, n_threads = n_threads)$Q - cost - } +wssne <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + ssne( + perplexity = perplexity, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) + ret_extra <- c(ret_extra, "pdeg") + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + # P matrix degree centrality: column sums + deg <- cost$pdeg + if (verbose) { + summarize(deg, "deg", verbose = verbose) + } + cost$M <- outer(deg, deg) + cost$invM <- 1 / cost$M + cost$eps <- eps + + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$G <- k2g(Y, 4 * (cost$P - cost$Q)) + cost + }, + update = function(cost, Y) { + cost$Q <- expQ( + Y, + cost$eps, + A = cost$M, + matrix_normalize = TRUE, + use_cpp = use_cpp, + n_threads = n_threads + )$Q + cost + } ) } # t-SNE but with the gradient defined in terms of un-normalized weights # Exists entirely as an academic exercise -tsneu <- function(perplexity, inp_kernel = "gaussian", - eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { +tsneu <- function(perplexity, + inp_kernel = "gaussian", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { lreplace( - tsne(perplexity = perplexity, inp_kernel = inp_kernel, use_cpp = use_cpp, - n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - ret_extra = unique(c(ret_extra, "V")) - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = "symmetric", normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) + tsne( + perplexity = perplexity, + inp_kernel = inp_kernel, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + ret_extra <- unique(c(ret_extra, "V")) + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = "symmetric", + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) cost$eps <- eps @@ -374,17 +526,36 @@ tsneu <- function(perplexity, inp_kernel = "gaussian", # A pseudo-separable approximation of t-SNE, where the output weight sum is only # recalculated during the epoch -pstsne <- function(perplexity, inp_kernel = "gaussian", - eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { +pstsne <- function(perplexity, + inp_kernel = "gaussian", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { lreplace( - tsne(perplexity = perplexity, inp_kernel = inp_kernel, use_cpp = use_cpp, - n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - ret_extra = unique(c(ret_extra, "V")) - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = "symmetric", normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) + tsne( + perplexity = perplexity, + inp_kernel = inp_kernel, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + ret_extra <- unique(c(ret_extra, "V")) + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = "symmetric", + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) # need to row-normalize and symmetrize affinities cost$V <- cost$V / rowSums(cost$V) @@ -401,7 +572,7 @@ pstsne <- function(perplexity, inp_kernel = "gaussian", P[P < eps] <- eps cost$plogp <- colSums(P * logm(P, eps)) cost$Z <- nrow(P) * nrow(P) - + cost }, gr = function(cost, Y) { @@ -431,25 +602,59 @@ pstsne <- function(perplexity, inp_kernel = "gaussian", # UMAP/t-SNE Hybrids ------------------------------------------------------ # Calculate P via normalized smooth knn-distances -skdtsne <- function(perplexity, eps = .Machine$double.eps, n_threads = 0, +skdtsne <- function(perplexity, + eps = .Machine$double.eps, + n_threads = 0, use_cpp = FALSE) { - tsne(perplexity = perplexity, inp_kernel = "skd", symmetrize = "umap", - eps = eps, n_threads = n_threads, use_cpp = use_cpp) + tsne( + perplexity = perplexity, + inp_kernel = "skd", + symmetrize = "umap", + eps = eps, + n_threads = n_threads, + use_cpp = use_cpp + ) } # Use the UMAP curve family in output kernel -usne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", - spread = 1, min_dist = 0.001, gr_eps = 0.1, - eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { +usne <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + spread = 1, + min_dist = 0.001, + gr_eps = 0.1, + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { lreplace( - tsne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { + tsne( + perplexity = perplexity, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { symmetrize <- match.arg(tolower(symmetrize), true_symmetrize_options()) - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = symmetrize, normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - cost <- init_ab(cost, spread = spread, min_dist = min_dist, verbose = verbose) + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = symmetrize, + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + cost <- init_ab(cost, + spread = spread, + min_dist = min_dist, + verbose = verbose + ) cost$eps <- eps cost }, @@ -457,7 +662,7 @@ usne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", D2 <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) D2[D2 < 0] <- 0 - W <- 1 / (1 + cost$a * D2 ^ cost$b) + W <- 1 / (1 + cost$a * D2^cost$b) diag(W) <- 0 cost$Z <- sum(W) @@ -474,60 +679,75 @@ usne <- function(perplexity, inp_kernel = "gaussian", symmetrize = "symmetric", } # UMAP cross entropy cost instead of KL divergence -cetsne <- function(perplexity, inp_kernel = "gaussian", - symmetrize = "symmetric", eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { - lreplace(tsne(perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, - ret_extra = c()) { - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = "symmetric", normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - - cost$eps <- eps - cost - }, - cache_input = function(cost) { - P <- cost$P - eps <- cost$eps - P[P < eps] <- eps - cost$Cp <- colSums(P * logm(P, eps) + (1 - P) * log1p(-P)) - cost - }, - pfn = function(cost, Y) { - cost <- cost_update(cost, Y) - - P <- cost$P - eps <- cost$eps - Q <- cost$Q - - cost$pcost <- colSums(-P * logm(Q, eps) - (1 - P) * log1p(-Q + eps)) + cost$Cp - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - cost$G <- k2g(Y, 4 * cost$W * (cost$C - cost$sumC * cost$Q)) - cost - }, - update = function(cost, Y) { - P <- cost$P - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - W <- 1 / (1 + W) - diag(W) <- 0 - - Q <- W / sum(W) - C <- (P - Q) / (1 - Q) - sumC <- sum(C) - - cost$W <- W - cost$Q <- Q - cost$C <- C - cost$sumC <- sumC - - cost - }, - export = cost_export +cetsne <- function(perplexity, + inp_kernel = "gaussian", + symmetrize = "symmetric", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + tsne(perplexity, use_cpp = use_cpp, n_threads = n_threads), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = "symmetric", + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + + cost$eps <- eps + cost + }, + cache_input = function(cost) { + P <- cost$P + eps <- cost$eps + P[P < eps] <- eps + cost$Cp <- colSums(P * logm(P, eps) + (1 - P) * log1p(-P)) + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + + P <- cost$P + eps <- cost$eps + Q <- cost$Q + + cost$pcost <- colSums(-P * logm(Q, eps) - (1 - P) * log1p(-Q + eps)) + cost$Cp + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$G <- k2g(Y, 4 * cost$W * (cost$C - cost$sumC * cost$Q)) + cost + }, + update = function(cost, Y) { + P <- cost$P + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + W <- 1 / (1 + W) + diag(W) <- 0 + + Q <- W / sum(W) + C <- (P - Q) / (1 - Q) + sumC <- sum(C) + + cost$W <- W + cost$Q <- Q + cost$C <- C + cost$sumC <- sumC + + cost + }, + export = cost_export ) } @@ -535,16 +755,37 @@ cetsne <- function(perplexity, inp_kernel = "gaussian", # Bandwidth Experiments -------------------------------------------------- # t-SNE with input kernel bandwidths transferred to output -btsne <- function(perplexity, inp_kernel = "gaussian", beta = NULL, - eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { - lreplace(tsne(perplexity = perplexity, inp_kernel = inp_kernel, - use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - ret_extra <- unique(c(ret_extra, 'beta')) - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = "symmetric", normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) +btsne <- function(perplexity, + inp_kernel = "gaussian", + beta = NULL, + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + tsne( + perplexity = perplexity, + inp_kernel = inp_kernel, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + ret_extra <- unique(c(ret_extra, "beta")) + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = "symmetric", + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) # override input bandwidths with fixed beta (although this doesn't do much) if (!is.null(beta)) { cost$beta <- beta @@ -570,49 +811,94 @@ btsne <- function(perplexity, inp_kernel = "gaussian", beta = NULL, } # SSNE with input kernel bandwidths transferred to output -bssne <- function(perplexity, inp_kernel = "gaussian", beta = NULL, - eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { - lreplace(ssne(perplexity = perplexity, inp_kernel = inp_kernel, - use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - ret_extra <- unique(c(ret_extra, 'beta')) - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = "symmetric", normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - if (!is.null(beta)) { - cost$beta <- beta - } - cost$eps <- eps - cost +bssne <- function(perplexity, + inp_kernel = "gaussian", + beta = NULL, + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + ssne( + perplexity = perplexity, + inp_kernel = inp_kernel, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + ret_extra <- unique(c(ret_extra, "beta")) + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = "symmetric", + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + if (!is.null(beta)) { + cost$beta <- beta + } + cost$eps <- eps + cost }, pfn = kl_costQ, gr = function(cost, Y) { cost <- cost$update(cost, Y) cost$G <- k2g(Y, 2 * cost$beta * (cost$P - cost$Q), symmetrize = TRUE) - + cost }, update = function(cost, Y) { - cost$Q <- expQ(Y, cost$eps, beta = cost$beta, matrix_normalize = TRUE, - use_cpp = use_cpp, n_threads = n_threads)$Q + cost$Q <- expQ( + Y, + cost$eps, + beta = cost$beta, + matrix_normalize = TRUE, + use_cpp = use_cpp, + n_threads = n_threads + )$Q cost } ) } # ASNE with input kernel bandwidths transferred to output -basne <- function(perplexity, beta = NULL, eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { +basne <- function(perplexity, + beta = NULL, + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { lreplace( - asne(perplexity = perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - ret_extra <- unique(c(ret_extra, 'beta')) - - cost <- sne_init(cost, X, perplexity = perplexity, - symmetrize = "none", normalize = FALSE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) + asne( + perplexity = perplexity, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + ret_extra <- unique(c(ret_extra, "beta")) + + cost <- sne_init( + cost, + X, + perplexity = perplexity, + symmetrize = "none", + normalize = FALSE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) # override input bandwidths with fixed beta (although this doesn't do much) if (!is.null(beta)) { cost$beta <- beta @@ -622,36 +908,50 @@ basne <- function(perplexity, beta = NULL, eps = .Machine$double.eps, cost }, update = function(cost, Y) { - cost$Q <- expQ(Y, eps = cost$eps, beta = cost$beta, is_symmetric = FALSE, - use_cpp = use_cpp, n_threads = n_threads)$Q + cost$Q <- expQ( + Y, + eps = cost$eps, + beta = cost$beta, + is_symmetric = FALSE, + use_cpp = use_cpp, + n_threads = n_threads + )$Q cost }, gr = function(cost, Y) { cost <- cost$update(cost, Y) cost$G <- k2g(Y, 2 * cost$beta * (cost$P - cost$Q), symmetrize = TRUE) cost - } + } ) } # t-ASNE with input kernel bandwidths transferred to output -btasne <- function(perplexity, beta = NULL, eps = .Machine$double.eps, - n_threads = 0, use_cpp = FALSE) { - lreplace(basne(perplexity = perplexity, beta = beta, eps = eps, - n_threads = n_threads, use_cpp = use_cpp), +btasne <- function(perplexity, + beta = NULL, + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + basne( + perplexity = perplexity, + beta = beta, + eps = eps, + n_threads = n_threads, + use_cpp = use_cpp + ), pfn = kl_cost, gr = function(cost, Y) { - cost <- cost_update(cost, Y) - W <- cost$W - cost$G <- k2g(Y, 2 * cost$beta * W * (cost$P - W / cost$Z), - symmetrize = TRUE) - cost + cost <- cost_update(cost, Y) + W <- cost$W + cost$G <- k2g(Y, 2 * cost$beta * W * (cost$P - W / cost$Z), symmetrize = TRUE) + cost }, update = function(cost, Y) { W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) W <- 1 / (1 + cost$beta * W) diag(W) <- 0 - + cost$W <- W cost$Z <- rowSums(W) cost @@ -659,34 +959,50 @@ btasne <- function(perplexity, beta = NULL, eps = .Machine$double.eps, ) } -tasne <- function(perplexity, n_threads = 0, use_cpp = FALSE) { - lreplace(tsne(perplexity = perplexity, use_cpp = use_cpp, - n_threads = n_threads), - init = function(cost, X, max_iter, eps = .Machine$double.eps, - verbose = FALSE, ret_extra = c()) { - cost <- sne_init(cost, X, perplexity = perplexity, - symmetrize = "none", normalize = FALSE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - cost$eps <- eps - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - - cost$G <- k2g(Y, 2 * cost$W * (cost$P - cost$W / cost$Z), - symmetrize = TRUE) - cost - }, - update = function(cost, Y) { - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - W <- 1 / (1 + W) - diag(W) <- 0 - - cost$W <- W - cost$Z <- 1 / rowSums(W) - cost - } +tasne <- function(perplexity, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + tsne( + perplexity = perplexity, + use_cpp = use_cpp, + n_threads = n_threads + ), + init = function(cost, + X, + max_iter, + eps = .Machine$double.eps, + verbose = FALSE, + ret_extra = c()) { + cost <- sne_init( + cost, + X, + perplexity = perplexity, + symmetrize = "none", + normalize = FALSE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + cost$eps <- eps + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + + cost$G <- k2g(Y, 2 * cost$W * (cost$P - cost$W / cost$Z), symmetrize = TRUE) + cost + }, + update = function(cost, Y) { + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + W <- 1 / (1 + W) + diag(W) <- 0 + + cost$W <- W + cost$Z <- 1 / rowSums(W) + cost + } ) } @@ -694,56 +1010,93 @@ tasne <- function(perplexity, n_threads = 0, use_cpp = FALSE) { # Normalization Experiments ----------------------------------------------- # ASNE but with the t-distributed kernel -tasne <- function(perplexity, inp_kernel = "gaussian", - eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { - lreplace(tsne(perplexity = perplexity, n_threads = n_threads, - use_cpp = use_cpp), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = "none", normalize = FALSE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - cost$eps <- eps - cost - }, - gr = function(cost, Y) { - cost <- cost_update(cost, Y) - - cost$G <- k2g(Y, 2 * cost$W * (cost$P - cost$W / cost$Z), - symmetrize = TRUE) - cost - }, - update = function(cost, Y) { - W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) - W <- 1 / (1 + W) - diag(W) <- 0 - - cost$W <- W - cost$Z <- rowSums(W) - cost - } +tasne <- function(perplexity, + inp_kernel = "gaussian", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + tsne( + perplexity = perplexity, + n_threads = n_threads, + use_cpp = use_cpp + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = "none", + normalize = FALSE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + cost$eps <- eps + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + + cost$G <- k2g(Y, 2 * cost$W * (cost$P - cost$W / cost$Z), symmetrize = TRUE) + cost + }, + update = function(cost, Y) { + W <- calc_d2(Y, use_cpp = use_cpp, n_threads = n_threads) + W <- 1 / (1 + W) + diag(W) <- 0 + + cost$W <- W + cost$Z <- rowSums(W) + cost + } ) } # t-RM-SNE # t-SNE without symmetrization of P (but still pair-normalizing) # row-normalize, then matrix normalize -trmsne <- function(perplexity, inp_kernel = "gaussian", - eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { +trmsne <- function(perplexity, + inp_kernel = "gaussian", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { lreplace( - tsne(perplexity = perplexity, inp_kernel = inp_kernel, - n_threads = n_threads, use_cpp = use_cpp), - init = function(cost, X, max_iter, verbose = FALSE, ret_extra = c()) { - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = "none", normalize = TRUE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) + tsne( + perplexity = perplexity, + inp_kernel = inp_kernel, + n_threads = n_threads, + use_cpp = use_cpp + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = "none", + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) cost$eps <- eps cost }, gr = function(cost, Y) { cost <- cost_update(cost, Y) - + W <- cost$W cost$G <- k2g(Y, 2 * W * (cost$P - W / cost$Z), symmetrize = TRUE) cost @@ -754,62 +1107,106 @@ trmsne <- function(perplexity, inp_kernel = "gaussian", # t-M-SNE # t-SNE but without row-normalizing or symmetrizing, just matrix normalization # Not recommended -tmsne <- function(perplexity, inp_kernel = "gaussian", - eps = .Machine$double.eps, n_threads = 0, use_cpp = FALSE) { - lreplace(trmsne(perplexity = perplexity, inp_kernel = inp_kernel, - eps = eps, n_threads = n_threads, use_cpp = use_cpp), - init = function(cost, X, max_iter, verbose = FALSE, - ret_extra = c()) { - cost <- sne_init(cost, X, perplexity = perplexity, kernel = inp_kernel, - symmetrize = "none", row_normalize = FALSE, - normalize = TRUE, verbose = verbose, - ret_extra = ret_extra, n_threads = n_threads, - use_cpp = use_cpp) - cost$eps <- eps - cost - } +tmsne <- function(perplexity, + inp_kernel = "gaussian", + eps = .Machine$double.eps, + n_threads = 0, + use_cpp = FALSE) { + lreplace( + trmsne( + perplexity = perplexity, + inp_kernel = inp_kernel, + eps = eps, + n_threads = n_threads, + use_cpp = use_cpp + ), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = "none", + row_normalize = FALSE, + normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + cost$eps <- eps + cost + } ) } # RSR row-normalize, symmetrize, then row-normalize again # Might work a tiny bit better than t-ASNE? -trsrsne <- function(perplexity, eps = .Machine$double.eps, n_threads = 0, +trsrsne <- function(perplexity, + eps = .Machine$double.eps, + n_threads = 0, use_cpp = FALSE) { - lreplace(tasne(perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, - ret_extra = c()) { - cost <- sne_init(cost, X, perplexity = perplexity, - symmetrize = "symmetric", normalize = FALSE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - P <- cost$P - P <- P / rowSums(P) - cost$P <- P - - cost$eps <- eps - cost - } + lreplace( + tasne(perplexity, use_cpp = use_cpp, n_threads = n_threads), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + cost <- sne_init( + cost, + X, + perplexity = perplexity, + symmetrize = "symmetric", + normalize = FALSE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + P <- cost$P + P <- P / rowSums(P) + cost$P <- P + + cost$eps <- eps + cost + } ) } -arsrsne <- function(perplexity, eps = .Machine$double.eps, n_threads = 0, +arsrsne <- function(perplexity, + eps = .Machine$double.eps, + n_threads = 0, use_cpp = FALSE) { - lreplace(asne(perplexity, use_cpp = use_cpp, n_threads = n_threads), - init = function(cost, X, max_iter, verbose = FALSE, - ret_extra = c()) { - cost <- sne_init(cost, X, perplexity = perplexity, - symmetrize = "symmetric", normalize = FALSE, - verbose = verbose, ret_extra = ret_extra, - n_threads = n_threads, use_cpp = use_cpp) - P <- cost$P - P <- P / rowSums(P) - cost$P <- P - - cost$eps <- eps - cost - } + lreplace( + asne(perplexity, use_cpp = use_cpp, n_threads = n_threads), + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + cost <- sne_init( + cost, + X, + perplexity = perplexity, + symmetrize = "symmetric", + normalize = FALSE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = use_cpp + ) + P <- cost$P + P <- P / rowSums(P) + cost$P <- P + + cost$eps <- eps + cost + } ) } - - From 66d6391cd4563abf6070a7f76bf94a321bca2a39 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sat, 3 Aug 2024 12:45:50 -0700 Subject: [PATCH 19/26] breakup smallvis.R a bit more --- smallvis/R/epoch.R | 49 ++ smallvis/R/input.R | 76 +++ smallvis/R/iterated.R | 293 +++++++++ smallvis/R/smallvis.R | 1303 ++++++++++++++++++++--------------------- 4 files changed, 1060 insertions(+), 661 deletions(-) create mode 100644 smallvis/R/epoch.R create mode 100644 smallvis/R/input.R create mode 100644 smallvis/R/iterated.R diff --git a/smallvis/R/epoch.R b/smallvis/R/epoch.R new file mode 100644 index 0000000..15ac847 --- /dev/null +++ b/smallvis/R/epoch.R @@ -0,0 +1,49 @@ +# Epoch Functions --------------------------------------------------------- + +do_epoch <- function(opt, cost, iter, Y, fn_val) { + if (!is.null(cost$epoch)) { + res <- cost$epoch(opt, cost, iter, Y, fn_val) + if (!is.null(res$opt)) { + opt <- res$opt + } + if (!is.null(res$cost)) { + cost <- res$cost + } + } + + list(opt = opt, cost = cost) +} + +# Helper function for epoch callback, allowing user to supply callbacks with +# multiple arities. +do_callback <- function(cb, + Y, + iter, + cost = NULL, + cost_fn = NULL, + opt = NULL) { + nfs <- length(formals(cb)) + switch(nfs, + "1" = cb(Y), + "2" = cb(Y, iter), + "3" = cb(Y, iter, cost), + "4" = cb(Y, iter, cost, cost_fn), + "5" = cb(Y, iter, cost, cost_fn, opt) + ) +} + +# Create a callback for visualization +make_smallvis_cb <- function(df) { + force(df) + palette <- NULL + function(Y, iter, cost = NULL) { + if (is.null(palette)) { + palette <- vizier:::color_helper(df, color_scheme = grDevices::rainbow)$palette + } + title <- paste0("iter: ", iter) + if (!(is.null(cost) || is.na(cost))) { + title <- paste0(title, " cost = ", formatC(cost)) + } + vizier::embed_plot(Y, df, title = title, color_scheme = palette) + } +} diff --git a/smallvis/R/input.R b/smallvis/R/input.R new file mode 100644 index 0000000..0d2cd21 --- /dev/null +++ b/smallvis/R/input.R @@ -0,0 +1,76 @@ +# Input Preprocess -------------------------------------------------------- + +# Scale X according to various strategies +scale_input <- function(X, scale, verbose = FALSE) { + if (is.null(scale)) { + scale <- "none" + } + if (is.logical(scale)) { + if (scale) { + scale <- "scale" + } else { + scale <- "none" + } + } + scale <- match.arg(tolower(scale), c("none", "scale", "range", "absmax")) + + switch(scale, + range = { + tsmessage("Range scaling X") + X <- as.matrix(X) + X <- X - min(X) + X <- X / max(X) + }, + absmax = { + tsmessage("Normalizing by abs-max") + X <- base::scale(X, scale = FALSE) + X <- X / abs(max(X)) + }, + scale = { + tsmessage("Scaling to zero mean and unit variance") + X <- Filter(stats::var, X) + tsmessage("Kept ", ncol(X), " non-zero-variance columns") + X <- base::scale(X, scale = TRUE) + }, + none = { + X <- as.matrix(X) + } + ) + X +} + +# Reduce input dimensionality via PCA and also optionally whiten data +pca_preprocess <- function(X, pca, whiten, initial_dims, verbose = FALSE) { + # We won't do PCA if the rank of the input is less than the requested + # initial dimensionality + if (pca) { + pca <- min(nrow(X), ncol(X)) >= initial_dims + } + if (pca) { + if (whiten) { + tsmessage( + "Reducing initial dimensionality with PCA and ", + "whitening to ", + initial_dims, + " dims" + ) + X <- pca_whiten( + X = X, + ncol = initial_dims, + verbose = verbose + ) + } else { + tsmessage( + "Reducing initial dimensionality with PCA to ", + initial_dims, + " dims" + ) + X <- pca_scores( + X = X, + ncol = initial_dims, + verbose = verbose + ) + } + } + X +} diff --git a/smallvis/R/iterated.R b/smallvis/R/iterated.R new file mode 100644 index 0000000..ab1f7d0 --- /dev/null +++ b/smallvis/R/iterated.R @@ -0,0 +1,293 @@ +#' Best t-SNE Result From Multiple Initializations +#' +#' Run t-SNE multiple times from a random initialization, and return the +#' embedding with the lowest cost. +#' +#' This function ignores any value of \code{Y_init} you set, and uses +#' \code{Y_init = "rand"}. +#' +#' @param nrep Number of repeats. +#' @param keep_all If \code{TRUE}, then the return value is a list of lists, +#' indexed from 1 .. \code{nrep}, with each entry the result from each +#' \code{\link{smallvis}} run. Otherwise just the result with the lowest error +#' is returned. +#' @param ... Arguments to apply to each \code{\link{smallvis}} run. +#' @return The \code{\link{smallvis}} result with the lowest final cost, or +#' if \code{keep_all} is \code{TRUE} all results as a list, indexed as 1 .. +#' \code{nrep}. If \code{ret_extra} is not \code{FALSE}, then the final costs for all +#' \code{nrep} runs are also included in the return value list as a vector +#' called \code{all_costs}. In this case, if \code{keep_all} is \code{TRUE}, then +#' \code{all_costs} appears as an extra item on all results. Additionally, +#' each result will have an extra entry \code{best_rep}, giving the index of the +#' result with the lowest cost. +#' @examples +#' \dontrun{ +#' # Return best result out of five random initializations +#' tsne_iris_best <- smallvis_rep( +#' nrep = 5, X = iris, perplexity = 50, method = "tsne", +#' ret_extra = TRUE +#' ) +#' # How much do the costs vary between runs? +#' range(tsne_iris_best$all_costs) +#' # Display best embedding found +#' plot(tsne_iris_best$Y) +#' +#' # Keep all results +#' # First result is in tsne_iris_rep[[1]], second in tsne_iris_rep[[2]] etc. +#' tsne_iris_rep <- smallvis_rep( +#' nrep = 5, X = iris, perplexity = 50, method = "tsne", +#' ret_extra = TRUE, keep_all = TRUE +#' ) +#' # Index of result with smallest error is in special list item 'best_rep' +#' best_iris <- tsne_iris_rep[[tsne_iris_rep[[1]]$best_rep]] +#' } +#' @export +smallvis_rep <- function(nrep = 10, + keep_all = FALSE, + ...) { + if (nrep < 1) { + stop("nrep must be 1 or greater") + } + varargs <- list(...) + best_res <- NULL + best_cost <- Inf + all_costs <- c() + # Keep requested return type for final result + ret_extra <- varargs$ret_extra + + # always return extra so we can find the cost + if (!should_ret_extra(ret_extra)) { + varargs$ret_extra <- TRUE + } + + varargs$Y_init <- "rand" + ret <- list() + + for (i in 1:nrep) { + # If verbose is not explicitly set to FALSE, it's TRUE by default + if (nnat(varargs$verbose) || is.null(varargs$verbose)) { + tsmessage("Starting embedding # ", i, " of ", nrep) + } + res <- do.call(smallvis, varargs) + + if (keep_all) { + ret[[i]] <- res + } + + final_cost <- res$itercosts[length(res$itercosts)] + names(final_cost) <- NULL + all_costs <- c(all_costs, final_cost) + + if (!keep_all) { + if (final_cost < best_cost) { + best_cost <- final_cost + best_res <- res + } + } + } + + if (keep_all) { + # if keep_all is TRUE and we asked for extra return info + # also add the final costs and the index of the best result to each result + if (should_ret_extra(ret_extra)) { + best_rep <- which.min(all_costs) + for (i in 1:nrep) { + ret[[i]]$all_costs <- all_costs + ret[[i]]$best_rep <- best_rep + } + } + # otherwise just store the Y-coordinates of each result + else { + for (i in 1:nrep) { + ret[[i]] <- ret[[i]]$Y + } + } + } else { + # Only keeping one result + if (should_ret_extra(ret_extra)) { + # store info about other results on best result list + best_res$all_costs <- all_costs + ret <- best_res + } else { + ret <- best_res$Y + } + } + ret +} + +# If ret_extra is NULL or FALSE, we aren't returning extra info +# If ret_extra is TRUE or a vector, we are returning extra info +should_ret_extra <- function(ret_extra) { + !is.null(ret_extra) && + ((methods::is(ret_extra, "logical") && ret_extra) || + methods::is(ret_extra, "character")) +} + +#' Dimensionality Reduction With Perplexity Stepping +#' +#' Carry out dimensionality reduction of a (small) dataset using one of a +#' variety of neighbor embedding methods, using a decreasing value of +#' perplexity to avoid bad local minima. +#' +#' This function uses ideas similar to those in the NeRV (Venna et al., 2010) +#' and JSE (Lee et al., 2013), where to avoid local minima, the initial +#' optimization steps use affinities with larger bandwidths (NeRV) or larger +#' perplexity values (JSE). This implementation uses a series of decreasing +#' perplexity values, as in JSE. +#' +#' For details on the arguments that can be passed to the dimensionality +#' reduction routine, see the help text for \code{\link{smallvis}}. +#' +#' To avoid spending too much extra time in perplexity calibrations, the extra +#' perplexities start at the power of 2 closest to, but not greater than, +#' half the dataset size (in terms of number of objects). Further calibrations +#' are then carried out halving the perplexity each time, until the perplexity +#' specified by the user is reached. +#' +#' The number of iterations spent in the larger perplexity values is specified +#' by the \code{step_iter} parameter. This determines the total number +#' of iterations, e.g. if \code{step_iter = 250} and extra optimizations +#' at a perplexity of 1024, 512, 256, 128 and 64 will be carried out, these will +#' run for 50 iterations each. To keep the number of iterations equivalent to +#' that used by a single run of \code{\link{smallvis}}, the value of +#' \code{step_iter} is subtracted from the value of \code{max_iter} before +#' the optimization at the target perplexity is carried out, e.g. if +#' \code{max_iter = 1000} and \code{step_iter = 250}, the final +#' optimization will run for 750 iterations only. +#' +#' Any value of \code{tol}, \code{exaggeration_factor} and +#' \code{stop_lying_iter} provided is used only with the final optimization. +#' +#' @param step_iter Number of iterations to carry out the perplexity +#' stepping. Must be < the value of \code{max_iter}. +#' @param ... Arguments to be passed to \code{\link{smallvis}}. See 'Details' +#' for information on which arguments may be modified or ignored during certain +#' parts of the embedding. +#' @return The result of the final run of \code{\link{smallvis}} at the target +#' perplexity. +#' @examples +#' \dontrun{ +#' # t-SNE on the iris with L-BFGS optimization +#' # The 1000 max_iter is split between 250 iterations at perplexity = 64 +#' # and then 750 iterations at perplexity = 40. +#' iris_lbfgs_pstep <- smallvis_perpscale( +#' step_iter = 250, X = iris, scale = FALSE, verbose = TRUE, Y_init = "spca", +#' ret_extra = c("DX", "DY"), perplexity = 40, max_iter = 1000, opt = list("l-bfgs") +#' ) +#' } +#' @export +#' @references +#' Venna, J., Peltonen, J., Nybo, K., Aidos, H., & Kaski, S. (2010). +#' Information retrieval perspective to nonlinear dimensionality reduction for +#' data visualization. +#' \emph{Journal of Machine Learning Research}, \emph{11}, 451-490. +#' +#' Lee, J. A., Renard, E., Bernard, G., Dupont, P., & Verleysen, M. (2013). +#' Type 1 and 2 mixtures of Kullback-Leibler divergences as cost functions in +#' dimensionality reduction based on similarity preservation. +#' \emph{Neurocomputing}, \emph{112}, 92-108. +smallvis_perpstep <- function(step_iter = 250, ...) { + varargs <- list(...) + max_iter <- varargs$max_iter + if (is.null(max_iter)) { + max_iter <- 1000 + } + if (max_iter <= step_iter) { + stop("max_iter must be > step_iter") + } + + target_perplexity <- varargs$perplexity + if (is.null(target_perplexity)) { + target_perplexity <- 30 + } + + X <- varargs$X + if (methods::is(X, "dist")) { + n <- attr(X, "Size") + } else { + n <- nrow(X) + } + perps <- scale_perps(n = n, target_perp = target_perplexity) + + nperps <- length(perps) + if (nperps > 0) { + max_iter_step <- max(1, floor(step_iter / nperps)) + max_iter_target <- max(1, max_iter - step_iter) + + # Save/Modify some options between step iterations and final optimization + ret_extra <- varargs$ret_extra + varargs$ret_extra <- FALSE + varargs$max_iter <- max_iter_step + tol <- varargs$tol + varargs$tol <- 0 + exaggeration_factor <- varargs$exaggeration_factor + varargs$exaggeration_factor <- 1 + + epoch <- varargs$epoch + varargs$epoch <- max_iter_step + + # Loop over initial perplexities + res <- NULL + for (i in 1:nperps) { + if (nnat(varargs$verbose)) { + tsmessage( + "Optimizing at step perplexity ", + formatC(perps[i]), + " for ", + max_iter_step, + " iterations" + ) + } + varargs$perplexity <- perps[i] + if (i > 1) { + varargs$Y_init <- res + } + res <- do.call(smallvis, varargs) + } + + varargs$Y_init <- res + varargs$max_iter <- max_iter_target + # Put the old arguments back before final optimization + varargs$perplexity <- target_perplexity + if (!is.null(ret_extra)) { + varargs$ret_extra <- ret_extra + } + if (!is.null(tol)) { + varargs$tol <- tol + } + if (!is.null(epoch)) { + varargs$epoch <- epoch + } + if (!is.null(exaggeration_factor)) { + varargs$exaggeration_factor <- exaggeration_factor + } + } + + if (nnat(varargs$verbose)) { + tsmessage( + "Optimizing at target perplexity ", + formatC(target_perplexity), + " for ", + max_iter_target, + " iterations" + ) + } + do.call(smallvis, varargs) +} + +# Utility function for perplexity step +scale_perps <- function(n, target_perp) { + max_perp <- n / 2 + max_perp <- 2^floor(log(max_perp, 2)) + perp <- max_perp + + if (max_perp > target_perp) { + perps <- c() + while (perp > target_perp) { + perps <- c(perps, perp) + perp <- perp / 2 + } + } + + perps +} diff --git a/smallvis/R/smallvis.R b/smallvis/R/smallvis.R index 4a3989f..241e7a0 100644 --- a/smallvis/R/smallvis.R +++ b/smallvis/R/smallvis.R @@ -47,17 +47,17 @@ #' co-workers, use method \code{"bnerv"}. #' \item \code{"jse"} The Jensen-Shannon Embedding method of Lee and #' co-workers (2013). -#' \item \code{"absne"} The alpha-beta SNE method of Narayan and co-workers +#' \item \code{"absne"} The alpha-beta SNE method of Narayan and co-workers #' (2015). -#' \item \code{"chsne"} The chi-squared divergence version of t-SNE +#' \item \code{"chsne"} The chi-squared divergence version of t-SNE #' (Im and co-workers, 2018). -#' \item \code{"hlsne"} The Hellinger distance divergence version of t-SNE +#' \item \code{"hlsne"} The Hellinger distance divergence version of t-SNE #' (Im and co-workers, 2018). #' \item \code{"rklsne"} The reverse Kullback-Leibler divergence version of #' t-SNE (Im and co-workers, 2018). #' \item \code{"jssne"} The Jensen-Shannon divergence version of t-SNE (Im and #' co-workers, 2018). -#' \item \code{"gsne"}, The global SNE (g-SNE) method of Zhou and Sharpee +#' \item \code{"gsne"}, The global SNE (g-SNE) method of Zhou and Sharpee #' (2018). #' } #' @@ -73,9 +73,9 @@ #' used. To control these, instead of passing a name to the \code{method} #' parameter, pass a list. The first element is the name of the method you wish #' to use. Subsequent elements must be named values specifying the parameters. -#' +#' #' Some parameters are available for all (nor nearly all) methods. -#' +#' #' \itemize{ #' \item{\code{inp_kernel}} the input kernel function. Can be one of: #' \code{"gauss"} (the default), \code{"exp"} or \code{"knn"}. @@ -88,9 +88,9 @@ #' sparsification is carried out with this kernel, so there are no #' memory or performance improvements to be had with this setting. #' \code{"skd"} uses the smooth knn distances method as used by UMAP. -#' \item{\code{symmetrize}} the type of symmetrization, used by symmetric +#' \item{\code{symmetrize}} the type of symmetrization, used by symmetric #' methods only. Can be one of: -#' \code{"symmetric"} symmetric nearest neighbor style, by arithmetic +#' \code{"symmetric"} symmetric nearest neighbor style, by arithmetic #' averaging, as in t-SNE. #' \code{"fuzzy"} symmetrization by fuzzy set union as used in UMAP. #' \code{"mutual"} mutual nearest neighbor style as suggested by Schubert @@ -101,10 +101,10 @@ #' \item \code{"LargeVis"} #' \itemize{ #' \item{\code{gamma}} Weighting term for the repulsive versus attractive -#' forces. Default is \code{1}. The implementation by the creators of -#' LargeVis uses a default \code{gamma = 7}, but note that this is for -#' stochastic gradient descent with limited sampling of the repulsive -#' contributions so it's unlikely to be a good choice with the +#' forces. Default is \code{1}. The implementation by the creators of +#' LargeVis uses a default \code{gamma = 7}, but note that this is for +#' stochastic gradient descent with limited sampling of the repulsive +#' contributions so it's unlikely to be a good choice with the #' implementation used in this package. #' \item{\code{gr_eps}} Epsilon used in the gradient to prevent #' division by zero. Default is \code{0.1}. @@ -163,9 +163,9 @@ #' } #' \item \code{"ABSNE"} #' \itemize{ -#' \item{\code{alpha}} Alpha value for the alpha-beta divergence. Set +#' \item{\code{alpha}} Alpha value for the alpha-beta divergence. Set #' \code{alpha < 1} to produce more smaller, finer-grained clusters, and -#' \code{alpha > 1} to produce fewer, larger clusters, with more emphasis +#' \code{alpha > 1} to produce fewer, larger clusters, with more emphasis #' on global structure. Default is \code{1.0}, to give t-SNE-like behavior. #' \item{\code{lambda}} Sum of alpha + beta, where beta is the beta value #' for the alpha-beta divergence. Set \code{lambda < 1} to increase cluster @@ -174,8 +174,8 @@ #' } #' \item \code{"gsne"} #' \itemize{ -#' \item{\code{lambda}} Weighting factor to put increasing emphasis on -#' preserving global similarities. Set to \code{0} to get t-SNE (no +#' \item{\code{lambda}} Weighting factor to put increasing emphasis on +#' preserving global similarities. Set to \code{0} to get t-SNE (no #' extra emphasis on global structure), and to \code{1.0} to get equal #' weighting between the local and global divergences. Default is \code{1.0}. #' } @@ -193,7 +193,7 @@ #' \item{A matrix}: which must have dimensions \code{n} by \code{k}, where #' \code{n} is the number of rows in \code{X}. #' \item{\code{"rand"}}: initialize from a Gaussian distribution with mean 0 -#' and standard deviation 1e-4, the default used by t-SNE. The standard +#' and standard deviation 1e-4, the default used by t-SNE. The standard #' deviation can be controlled with \code{Y_init_sdev} (see below). #' \item{\code{"pca"}}: use the first \code{k} scores of the #' PCA: columns are centered, but no scaling beyond that which is applied by @@ -229,15 +229,15 @@ #' related to t-SNE (see Carreira-Perpinan, 2010, and Linderman and #' Steinerberger, 2017): it may therefore unnecessary to use the #' \code{exaggeration_factor} setting. -#' +#' #' The \code{Y_init_sdev} parameter, if provided, will scale the input #' coordinates such that the standard deviation of each dimension is the -#' provided value. The default is to do no scaling, except for -#' \code{Y_init = "spca"} and \code{Y_init = "rand"} where a scaling to a +#' provided value. The default is to do no scaling, except for +#' \code{Y_init = "spca"} and \code{Y_init = "rand"} where a scaling to a #' standard deviation of \code{1e-4} is used, as in t-SNE initialization. #' \code{Y_init = "spca"} is effectively an alias for \code{Y_init = "pca", #' Y_init_sdev = 1e-4}. -#' +#' #' Depending on the embedding method, a particular initialization method may #' result in initial coordinates with too small or large inter-point distances, #' which can result in too large or small gradients, respectively. In turn this @@ -282,21 +282,21 @@ #' \url{https://jlmelville.github.io/smallvis/idp.html}. #' #' @section Multiscale perplexities: -#' +#' #' Another technique to combine multiple perplexities is to use the multiscale -#' approach given by de Bodt and co-workers (2018). As with IDP, a series of +#' approach given by de Bodt and co-workers (2018). As with IDP, a series of #' candidate perplexities are used, but all the affinity matrices are used to #' create an average matrix which is used as the final probability matrix. -#' To use this method, set \code{perplexity = "multiscale"}. Default and custom -#' list of perplexities to use can be provided in the same way as with IDP. -#' +#' To use this method, set \code{perplexity = "multiscale"}. Default and custom +#' list of perplexities to use can be provided in the same way as with IDP. +#' #' Note that previous work by this group described a slightly more complex #' approach where the number of individual perplexity results are introduced #' into the average sequentially over the course of the optimization, and -#' the output probabilities are also generated by an averaging. Although also +#' the output probabilities are also generated by an averaging. Although also #' referred to as "multiscale", these variations are not implemented. Also, -#' if \code{ret_extra = TRUE} is used, extra data associated with a specific -#' perplexity (e.g. degree centrality, intrinsic dimensionality) will not +#' if \code{ret_extra = TRUE} is used, extra data associated with a specific +#' perplexity (e.g. degree centrality, intrinsic dimensionality) will not #' be returned. #' #' @section Alternative optimizers: @@ -419,7 +419,7 @@ #' @param perplexity The target perplexity for parameterizing the input #' probabilities. For method \code{"umap"}, controls the neighborhood size #' for parameterizing the smoothed k-nearest neighbor distances. See also the -#' 'Intrinsic dimensionality perplexity' and 'Multiscale perplexities' +#' 'Intrinsic dimensionality perplexity' and 'Multiscale perplexities' #' sections. #' @param max_iter Maximum number of iterations in the optimization. #' @param pca If \code{TRUE}, apply PCA to reduce the dimensionality of @@ -457,22 +457,22 @@ #' @param epoch After every \code{epoch} number of steps, calculates and #' displays the cost value and calls \code{epoch_callback}, if supplied. #' @param momentum Initial momentum value. -#' @param final_momentum Final momentum value. If +#' @param final_momentum Final momentum value. If #' \code{late_exaggeration_factor > 1}, then during late exaggeration, the #' momentum is switched back to \code{momentum} from this value. #' @param mom_switch_iter Iteration at which the momentum will switch from -#' \code{momentum} to \code{final_momentum}. If +#' \code{momentum} to \code{final_momentum}. If #' \code{exaggeration_factor > 1}, then this should occur at some point #' after \code{stop_lying_iter} (default is 150 iterations after). If #' the early exaggeration phase stops early, this value is treated as being #' relative to when early exaggeration stops, to avoid wasting iterations #' at the lower momentum value. For example, if \code{stop_lying_iter = 100} #' and \code{mom_switch_iter = 250} (the defaults), but early exaggeration -#' converges at iteration 50, the switch iteration will occur at iteration +#' converges at iteration 50, the switch iteration will occur at iteration #' 150. #' @param eta Learning rate value, a positive number. Or set to \code{"optsne"}, #' to use the formula suggested by Belkina and co-workers (2018) in their -#' opt-SNE package (the size of the dataset divided by the +#' opt-SNE package (the size of the dataset divided by the #' \code{exaggeration_factor}). #' @param min_gain Minimum gradient descent step size. #' @param opt Optional list specifying alternative minimization method. See @@ -510,8 +510,8 @@ #' number of iterations before monitoring the relative rate of change of the #' cost function during early exaggeration. #' @param ee_mon_buffer If \code{ee_mon_epoch} is non-\code{NULL}, then ignore -#' this number of occurences of the relative rate of change of the cost -#' function decreasing, which would otherwise signal termination of +#' this number of occurences of the relative rate of change of the cost +#' function decreasing, which would otherwise signal termination of #' the early exaggeration stage. This is to prevent erroneous termination of #' early exaggeration under conditions when the cost can fluctuate noisily. #' @param tol_wait Wait this number of iterations during standard optimization @@ -525,7 +525,7 @@ #' \code{Value} section for details. #' @param n_threads Number of threads to use in multi-threaded code. Default is #' 0, which means no multi-threading. Mainly affects the calculation of things -#' like distance matrices and perplexity calibration if you set +#' like distance matrices and perplexity calibration if you set #' \code{use_cpp = TRUE}. Otherwise, only methods that need to calculate #' nearest neighbors will be affected. #' @param use_cpp If \code{TRUE} use multi-threaded C++ code for some @@ -722,14 +722,14 @@ #' (pp. 585-591). #' \url{http://papers.nips.cc/paper/1961-laplacian-eigenmaps-and-spectral-techniques-for-embedding-and-clustering.pdf} #' -#' Belkina, A. C., Ciccolella, C. O., Anno, R., Spidlen, J., Halpert, R., & Snyder-Cappione, J. (2018). -#' Automated optimal parameters for T-distributed stochastic neighbor embedding improve visualization and allow analysis of large datasets. +#' Belkina, A. C., Ciccolella, C. O., Anno, R., Spidlen, J., Halpert, R., & Snyder-Cappione, J. (2018). +#' Automated optimal parameters for T-distributed stochastic neighbor embedding improve visualization and allow analysis of large datasets. #' \emph{bioRxiv}, 451690. #' \url{https://www.biorxiv.org/content/10.1101/451690v2.abstract} #' -#' De Bodt, C., Mulders, D., Verleysen, M., & Lee, J. A. (2018). -#' Perplexity-free t-SNE and twice Student tt-SNE. -#' In \emph{European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN 2018)} (pp. 123-128). +#' De Bodt, C., Mulders, D., Verleysen, M., & Lee, J. A. (2018). +#' Perplexity-free t-SNE and twice Student tt-SNE. +#' In \emph{European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN 2018)} (pp. 123-128). #' \url{http://hdl.handle.net/2078.1/200844} #' #' Borg, I., & Groenen, P. J. (2005). @@ -748,13 +748,13 @@ #' Stochastic neighbor embedding. #' In \emph{Advances in neural information processing systems} (pp. 833-840). #' -#' Im, D. J., Verma, N., & Branson, K. (2018). -#' Stochastic Neighbor Embedding under f-divergences. +#' Im, D. J., Verma, N., & Branson, K. (2018). +#' Stochastic Neighbor Embedding under f-divergences. #' \emph{arXiv preprint} \emph{arXiv}:1811.01247. #' \url{https://arxiv.org/abs/1811.01247} #' #' Kobak, D., & Berens, P. (2018). -#' The art of using t-SNE for single-cell transcriptomics. +#' The art of using t-SNE for single-cell transcriptomics. #' \emph{bioRxiv}, 453449. #' \url{https://doi.org/10.1101/453449} #' @@ -778,13 +778,13 @@ #' \emph{arXiv preprint} \emph{arXiv}:1802.03426. #' \url{https://arxiv.org/abs/1802.03426} #' -#' Narayan, K. S., Punjani, A., & Abbeel, P. (2015, June). -#' Alpha-Beta Divergences Discover Micro and Macro Structures in Data. +#' Narayan, K. S., Punjani, A., & Abbeel, P. (2015, June). +#' Alpha-Beta Divergences Discover Micro and Macro Structures in Data. #' In \emph{Proceedings of the 32nd International Conference on Machine Learning (ICML-14)} #' (pp 796-804). #' \url{http://proceedings.mlr.press/v37/narayan15.html} #' -#' Schubert, E., & Gertz, M. (2017, October). +#' Schubert, E., & Gertz, M. (2017, October). #' Intrinsic t-stochastic neighbor embedding for visualization and outlier detection. #' In \emph{International Conference on Similarity Search and Applications} #' (pp. 188-203). Springer, Cham. @@ -832,39 +832,47 @@ #' In \emph{Proceedings of the 18th International Conference on Artificial Intelligence and Statistics (AISTATS 2015)} #' (pp. 1088-1097). #' -#' Zhou, Y., & Sharpee, T. (2018). -#' Using global t-SNE to preserve inter-cluster data structure. -#' \emph{bioRxiv}, 331611. +#' Zhou, Y., & Sharpee, T. (2018). +#' Using global t-SNE to preserve inter-cluster data structure. +#' \emph{bioRxiv}, 331611. #' \url{https://doi.org/10.1101/331611} #' #' @export -smallvis <- function(X, k = 2, scale = "absmax", - Y_init = "rand", Y_init_sdev = NULL, - perplexity = 30, max_iter = 1000, - pca = FALSE, initial_dims = 50, - method = "tsne", - epoch_callback = TRUE, - epoch = max(1, base::round(max_iter / 10)), - min_cost = 0, tol = 1e-7, g2tol = NULL, - momentum = 0.5, final_momentum = 0.8, - mom_switch_iter = stop_lying_iter + 150, - eta = 500, min_gain = 0.01, - opt = list("dbd"), - exaggeration_factor = 1, - stop_lying_iter = max(1, floor(max_iter / 10)), - late_exaggeration_factor = 1, - start_late_lying_iter = max_iter - max(1, floor(max_iter / 10)), - iter0_cost = FALSE, - ee_mon_epoch = NULL, - ee_mon_wait = 15, - ee_mon_buffer = 2, - tol_wait = 15, - ret_extra = FALSE, - n_threads = 0, - use_cpp = FALSE, - eps = .Machine$double.eps, - verbose = TRUE) { - +smallvis <- function(X, + k = 2, + scale = "absmax", + Y_init = "rand", + Y_init_sdev = NULL, + perplexity = 30, + max_iter = 1000, + pca = FALSE, + initial_dims = 50, + method = "tsne", + epoch_callback = TRUE, + epoch = max(1, base::round(max_iter / 10)), + min_cost = 0, + tol = 1e-7, + g2tol = NULL, + momentum = 0.5, + final_momentum = 0.8, + mom_switch_iter = stop_lying_iter + 150, + eta = 500, + min_gain = 0.01, + opt = list("dbd"), + exaggeration_factor = 1, + stop_lying_iter = max(1, floor(max_iter / 10)), + late_exaggeration_factor = 1, + start_late_lying_iter = max_iter - max(1, floor(max_iter / 10)), + iter0_cost = FALSE, + ee_mon_epoch = NULL, + ee_mon_wait = 15, + ee_mon_buffer = 2, + tol_wait = 15, + ret_extra = FALSE, + n_threads = 0, + use_cpp = FALSE, + eps = .Machine$double.eps, + verbose = TRUE) { if (is.logical(epoch_callback)) { if (epoch_callback) { epoch_callback <- make_smallvis_cb(X) @@ -876,73 +884,347 @@ smallvis <- function(X, k = 2, scale = "absmax", else if (is.function(epoch_callback)) { force(epoch_callback) } - + # The embedding method - method_names <- c("tsne", "largevis", "umap", "tumap", - "ntumap", "mmds", "gmmds", - "asne", "ssne", "wtsne", "wssne", - "hssne", "ee", "nerv", "snerv", "jse", "sjse", - "smmds", "sammon", - "tasne", "trmsne", "trsrsne", "tmsne", "arsrsne", - "rsrjse", "rsrnerv", - "btsne", "bssne", "basne", "btasne", "bnerv", - "ballmmds", "knnmmds", - "dhssne", "pstsne", "tsneu", - "skdtsne", "usne", "cetsne", - "tee", "absne", "chsne", "hlsne", "rklsne", "jssne", - "gsne", "abssne", "bhssne") + method_names <- c( + "tsne", + "largevis", + "umap", + "tumap", + "ntumap", + "mmds", + "gmmds", + "asne", + "ssne", + "wtsne", + "wssne", + "hssne", + "ee", + "nerv", + "snerv", + "jse", + "sjse", + "smmds", + "sammon", + "tasne", + "trmsne", + "trsrsne", + "tmsne", + "arsrsne", + "rsrjse", + "rsrnerv", + "btsne", + "bssne", + "basne", + "btasne", + "bnerv", + "ballmmds", + "knnmmds", + "dhssne", + "pstsne", + "tsneu", + "skdtsne", + "usne", + "cetsne", + "tee", + "absne", + "chsne", + "hlsne", + "rklsne", + "jssne", + "gsne", + "abssne", + "bhssne" + ) if (is.character(method)) { method <- match.arg(tolower(method), method_names) - cost_fn <- switch(method, - tsne = tsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - umap = umap(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - largevis = largevis(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - tumap = tumap(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - ntumap = ntumap(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - mmds = mmds(n_threads = n_threads, eps = eps, use_cpp = use_cpp), - gmmds = gmmds(k = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - asne = asne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - ssne = ssne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - wtsne = wtsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - wssne = wssne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - hssne = hssne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - ee = ee(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - nerv = nerv(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - snerv = snerv(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - jse = jse(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - sjse = sjse(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - smmds = smmds(n_threads = n_threads, eps = eps, use_cpp = use_cpp), - sammon = sammon(n_threads = n_threads, eps = eps, use_cpp = use_cpp), - tasne = tasne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - trmsne = trmsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - trsrsne = trsrsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - tmsne = tmsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - arsrsne = arsrsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - rsrjse = rsrjse(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - rsrnerv = rsrnerv(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - btsne = btsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - bssne = bssne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - basne = basne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - btasne = btasne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - bnerv = bnerv(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - ballmmds = ballmmds(n_threads = n_threads, eps = eps, use_cpp = use_cpp), - knnmmds = knnmmds(k = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - dhssne = dhssne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - tsneu = tsneu(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - pstsne = pstsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - skdtsne = skdtsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - usne = usne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - cetsne = cetsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - tee = tee(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - absne = absne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - chsne = chsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - hlsne = hlsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - gsne = gsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - rklsne = rklsne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - jssne = jssne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - abssne = abssne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - bhssne = bhssne(perplexity = perplexity, n_threads = n_threads, eps = eps, use_cpp = use_cpp), - stop("BUG: someone forgot to implement option: '", method, "'") + cost_fn <- switch( + method, + tsne = tsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + umap = umap( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + largevis = largevis( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + tumap = tumap( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + ntumap = ntumap( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + mmds = mmds( + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + gmmds = gmmds( + k = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + asne = asne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + ssne = ssne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + wtsne = wtsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + wssne = wssne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + hssne = hssne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + ee = ee( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + nerv = nerv( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + snerv = snerv( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + jse = jse( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + sjse = sjse( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + smmds = smmds( + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + sammon = sammon( + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + tasne = tasne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + trmsne = trmsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + trsrsne = trsrsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + tmsne = tmsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + arsrsne = arsrsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + rsrjse = rsrjse( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + rsrnerv = rsrnerv( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + btsne = btsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + bssne = bssne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + basne = basne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + btasne = btasne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + bnerv = bnerv( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + ballmmds = ballmmds( + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + knnmmds = knnmmds( + k = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + dhssne = dhssne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + tsneu = tsneu( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + pstsne = pstsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + skdtsne = skdtsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + usne = usne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + cetsne = cetsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + tee = tee( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + absne = absne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + chsne = chsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + hlsne = hlsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + gsne = gsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + rklsne = rklsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + jssne = jssne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + abssne = abssne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + bhssne = bhssne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + stop("BUG: someone forgot to implement option: '", method, "'") ) } else { @@ -965,7 +1247,7 @@ smallvis <- function(X, k = 2, scale = "absmax", else { stop("Bad method parameter type") } - + param_names <- names(formals(fn)) if ("perplexity" %in% param_names) { methodlist$perplexity <- perplexity @@ -975,25 +1257,25 @@ smallvis <- function(X, k = 2, scale = "absmax", } cost_fn <- do.call(fn, methodlist) } - + if (exaggeration_factor != 1) { if (stop_lying_iter < 1) { stop("stop_lying_iter must be >= 1") } } - + if (late_exaggeration_factor != 1) { if (start_late_lying_iter < 1) { stop("start_late_lying_iter must be >= 1") } } - + if (exaggeration_factor != 1 && late_exaggeration_factor != 1) { if (start_late_lying_iter < stop_lying_iter) { stop("start_late_lying_iter must be >= stop_lying_iter") } } - + if (methods::is(pca, "character") && pca == "whiten") { pca <- TRUE whiten <- TRUE @@ -1005,18 +1287,18 @@ smallvis <- function(X, k = 2, scale = "absmax", stop("Initial PCA dimensionality must be larger than desired output ", "dimension") } - + start_time <- NULL ret_optionals <- c() if (methods::is(ret_extra, "character")) { ret_optionals <- ret_extra ret_extra <- TRUE } - + if (ret_extra) { start_time <- Sys.time() } - + if (methods::is(X, "dist")) { n <- attr(X, "Size") } @@ -1029,45 +1311,56 @@ smallvis <- function(X, k = 2, scale = "absmax", } X <- X[, indexes] } - + X <- scale_input(X, scale, verbose = verbose) X <- pca_preprocess(X, pca, whiten, initial_dims, verbose = verbose) n <- nrow(X) } - + # Check for NA if (any(is.na(X))) { stop("Input data contains NA: missing data is not allowed") } - + # Fail early as possible if matrix initializer is invalid if (methods::is(Y_init, "matrix")) { if (nrow(Y_init) != n || ncol(Y_init) != k) { stop("Y_init matrix does not match necessary configuration for X") } } - + # Optimizer if (opt[[1]] == "dbd" || opt[[1]] == "ndbd") { if (eta == "optsne") { eta <- n / exaggeration_factor tsmessage("Using opt-SNE learning rate = ", formatC(eta)) } - opt_list <- lmerge(opt, list(momentum = momentum, - final_momentum = final_momentum, - mom_switch_iter = mom_switch_iter, - eta = eta, min_gain = min_gain, - verbose = verbose)) + opt_list <- lmerge( + opt, + list( + momentum = momentum, + final_momentum = final_momentum, + mom_switch_iter = mom_switch_iter, + eta = eta, + min_gain = min_gain, + verbose = verbose + ) + ) } else { opt_list <- opt } opt <- opt_create(opt_list, verbose = verbose) - + # Initialize the cost function and create P - cost_fn <- cost_init(cost_fn, X, max_iter = max_iter, verbose = verbose, - ret_extra = ret_optionals) - + cost_fn <- cost_init( + cost_fn, + X, + max_iter = max_iter, + verbose = verbose, + ret_extra = ret_optionals + ) + # Output Initialization if (!is.null(Y_init)) { if (methods::is(Y_init, "matrix")) { @@ -1077,14 +1370,18 @@ smallvis <- function(X, k = 2, scale = "absmax", else { Y_init <- match.arg(tolower(Y_init), c("rand", "pca", "spca", "laplacian", "normlaplacian")) - + if (!is_spectral_init(Y_init)) { # We now handle scaling coordinates below, so spca is treated like pca non_spectral_init <- Y_init if (non_spectral_init == "spca") { non_spectral_init <- "pca" } - Y <- init_out(non_spectral_init, X, n, k, pca_preprocessed = pca, + Y <- init_out(non_spectral_init, + X, + n, + k, + pca_preprocessed = pca, verbose = verbose) } else { @@ -1095,7 +1392,7 @@ smallvis <- function(X, k = 2, scale = "absmax", A <- cost_fn$P if (!is.null(A)) { tsmessage("Using P for spectral initialization") - } + } } else { tsmessage("Using V for spectral initialization") @@ -1126,7 +1423,8 @@ smallvis <- function(X, k = 2, scale = "absmax", } } } - if (!is.null(Y_init_sdev) || Y_init == "spca" || Y_init == "rand") { + if (!is.null(Y_init_sdev) || + Y_init == "spca" || Y_init == "rand") { if (is.null(Y_init_sdev)) { Y_init_sdev <- 1e-4 } @@ -1134,34 +1432,48 @@ smallvis <- function(X, k = 2, scale = "absmax", Y <- shrink_coords(Y, Y_init_sdev) } } - + cost <- NA itercosts <- c() if (iter0_cost && (verbose || ret_extra)) { cost_eval_res <- cost_eval(cost_fn, Y) cost_fn <- cost_eval_res$cost cost <- cost_eval_res$value - + tsmessage("Iteration #0 error: ", formatC(cost)) - + if (ret_extra) { names(cost) <- 0 itercosts <- c(itercosts, cost) } cost_fn <- cost_clear(cost_fn) } - + # Display initialization if (!is.null(epoch_callback)) { do_callback(epoch_callback, Y, 0, cost, cost_fn, opt) } if (max_iter < 1) { - return(ret_value(Y, ret_extra, method, X, scale, Y_init, iter = 0, - cost_fn = cost_fn, itercosts = itercosts, - start_time = start_time, optionals = ret_optionals, - pca = ifelse(pca && !whiten, initial_dims, 0), - whiten = ifelse(pca && whiten, initial_dims, 0), - use_cpp = use_cpp, n_threads = n_threads)) + return( + ret_value( + Y, + ret_extra, + method, + X, + scale, + Y_init, + iter = 0, + cost_fn = cost_fn, + itercosts = itercosts, + start_time = start_time, + optionals = ret_optionals, + pca = ifelse(pca && !whiten, initial_dims, 0), + whiten = ifelse(pca && + whiten, initial_dims, 0), + use_cpp = use_cpp, + n_threads = n_threads + ) + ) } opt_stages <- c() @@ -1182,18 +1494,23 @@ smallvis <- function(X, k = 2, scale = "absmax", cost_fn <- start_exaggerating(cost_fn, exaggeration_factor) if (!is.null(ee_mon_epoch)) { epoch <- ee_mon_epoch - # prevent tolerance based convergence if we monitor EE + # prevent tolerance based convergence if we monitor EE ee_mon_tol <- 0 tol <- ee_mon_tol stop_lying_iter <- max_iter - tsmessage("Applying early exaggeration factor = ", exaggeration_factor, - ", epoch every ", epoch, " iterations") + tsmessage( + "Applying early exaggeration factor = ", + exaggeration_factor, + ", epoch every ", + epoch, + " iterations" + ) } } else { stop_lying_iter <- 0 } - + old_cost <- NULL tolval <- NULL tsmessage("Optimizing coordinates") @@ -1206,7 +1523,7 @@ smallvis <- function(X, k = 2, scale = "absmax", opt <- opt_res$opt cost_fn <- opt_res$cost_fn Y <- opt_res$Y - + if (!is.null(cost_fn$P) && iter == stop_lying_iter && exaggeration_factor != 1) { tsmessage("Switching off exaggeration at iter ", iter) @@ -1215,60 +1532,79 @@ smallvis <- function(X, k = 2, scale = "absmax", epoch <- opt_epoch tol <- opt_tol } - + if (!is.null(cost_fn$P) && iter == start_late_lying_iter && late_exaggeration_factor != 1) { - tsmessage("Starting late exaggeration = ", - formatC(late_exaggeration_factor), " at iter ", iter, - " until iter ", max_iter) + tsmessage( + "Starting late exaggeration = ", + formatC(late_exaggeration_factor), + " at iter ", + iter, + " until iter ", + max_iter + ) cost_fn <- start_exaggerating(cost_fn, late_exaggeration_factor) opt_stage_idx <- opt_stage_idx + 1 } - + if (nnat(opt$is_terminated)) { - tsmessage("Iteration #", iter, - " stopping early: optimizer reports convergence: ", - opt$terminate$what) + tsmessage( + "Iteration #", + iter, + " stopping early: optimizer reports convergence: ", + opt$terminate$what + ) max_iter <- iter break } # Recenter after each iteration Y <- sweep(Y, 2, colMeans(Y)) - + if ((epoch > 0 && iter %% epoch == 0) || iter == max_iter) { stop_early <- FALSE cost_eval_res <- cost_eval(cost_fn, Y, opt_res) cost_fn <- cost_eval_res$cost cost <- cost_eval_res$value - + if (!is.null(old_cost)) { tolval <- reltol(cost, old_cost) / epoch } if (verbose) { - tsmessage("Iteration #", iter, " error: ", - formatC(cost) - , " ||G||2 = ", formatC(norm2(opt_res$G)) - , appendLF = FALSE - ) + tsmessage( + "Iteration #", + iter, + " error: ", + formatC(cost) + , + " ||G||2 = ", + formatC(norm2(opt_res$G)) + , + appendLF = FALSE + ) if (!is.null(tolval)) { message(" tol = ", formatC(tolval), appendLF = FALSE) } if (!is.null(opt$counts)) { - message(" nf = ", opt$counts$fn, " ng = ", opt$counts$gr, + message(" nf = ", + opt$counts$fn, + " ng = ", + opt$counts$gr, appendLF = FALSE) } - + # special treatment for mize innards if (!is.null(opt$stages$gradient_descent$step_size$value)) { opt$stages$gradient_descent$step_size$value - message(" alpha = ", - formatC(opt$stages$gradient_descent$step_size$value), - appendLF = FALSE) + message( + " alpha = ", + formatC(opt$stages$gradient_descent$step_size$value), + appendLF = FALSE + ) } if (!is.null(old_cost) && cost > old_cost) { @@ -1285,18 +1621,20 @@ smallvis <- function(X, k = 2, scale = "absmax", if (!is.null(epoch_callback)) { do_callback(epoch_callback, Y, iter, cost, cost_fn, opt) } - + if (ret_extra) { names(cost) <- iter itercosts <- c(itercosts, cost) } - + # Early stopping tests if (!is.null(ee_mon_epoch)) { cdiff <- old_cost - cost cdiffrc <- cdiff / old_cost - if (length(cdiffrc) > 0 && opt_stages[opt_stage_idx] == "early") { - if (iter > ee_mon_wait && length(cdiffrc) > 0 && length(old_cdiffrc) > 0 && + if (length(cdiffrc) > 0 && + opt_stages[opt_stage_idx] == "early") { + if (iter > ee_mon_wait && + length(cdiffrc) > 0 && length(old_cdiffrc) > 0 && cdiffrc < old_cdiffrc) { if (ee_mon_buffer < 1) { stop_early <- TRUE @@ -1314,23 +1652,28 @@ smallvis <- function(X, k = 2, scale = "absmax", tsmessage("Stopping early: cost fell below min_cost") } - if (!nnat(opt$is_terminated) && !is.null(tolval) && - tolval < tol && cost <= old_cost && - (iter > stop_lying_iter + tol_wait - || opt_stages[opt_stage_idx] != "opt")) { + if (!nnat(opt$is_terminated) && !is.null(tolval) && + tolval < tol && cost <= old_cost && + (iter > stop_lying_iter + tol_wait + || opt_stages[opt_stage_idx] != "opt")) { stop_early <- TRUE - tsmessage("Stopping early: relative tolerance (", formatC(tol), ") met") + tsmessage("Stopping early: relative tolerance (", + formatC(tol), + ") met") } - + # Alternative tolerance grad 2norm doesn't need cost to decrease to stop # (Use this for certain settings with e.g. LargeVis where numerical issues # can cause the cost function to increase almost negligibly) g2tolval <- (norm2(opt_res$G)) - if (!nnat(opt$is_terminated) && !is.null(g2tol) && g2tolval < g2tol && - (iter > stop_lying_iter + tol_wait + if (!nnat(opt$is_terminated) && + !is.null(g2tol) && g2tolval < g2tol && + (iter > stop_lying_iter + tol_wait || opt_stages[opt_stage_idx] != "opt")) { stop_early <- TRUE - tsmessage("Stopping early: ||G||2 tolerance (", formatC(g2tol), ") met") + tsmessage("Stopping early: ||G||2 tolerance (", + formatC(g2tol), + ") met") } # Stop current stage early if we aren't making progress @@ -1349,27 +1692,28 @@ smallvis <- function(X, k = 2, scale = "absmax", mom_switch_iter <- stop_lying_iter + n_low_mom_iters opt$mom_switch_iter <- mom_switch_iter } - + # we know there is at least one more stage or we would have hit the # break earlier next_opt_stage <- opt_stages[opt_stage_idx + 1] - switch(next_opt_stage, - opt = tsmessage("Proceeding to main optimization stage"), - late = { - n_late_exagg_iters <- max_iter - start_late_lying_iter - start_late_lying_iter <- iter + 1 - max_iter <- start_late_lying_iter + n_late_exagg_iters - tsmessage("Proceeding to late exaggeration stage") - }, - stop("BUG: unknown optimization stage '", next_opt_stage, "'") + switch( + next_opt_stage, + opt = tsmessage("Proceeding to main optimization stage"), + late = { + n_late_exagg_iters <- max_iter - start_late_lying_iter + start_late_lying_iter <- iter + 1 + max_iter <- start_late_lying_iter + n_late_exagg_iters + tsmessage("Proceeding to late exaggeration stage") + }, + stop("BUG: unknown optimization stage '", next_opt_stage, "'") ) } if (nnat(opt$is_terminated)) { break } - + old_cost <- cost # Any special custom epoch stuff @@ -1378,7 +1722,7 @@ smallvis <- function(X, k = 2, scale = "absmax", cost_fn <- epoch_res$cost } } - + if (opt_stages[opt_stage_idx] == "early") { cost_fn <- stop_exaggerating(cost_fn, exaggeration_factor) } @@ -1388,413 +1732,33 @@ smallvis <- function(X, k = 2, scale = "absmax", # Recenter before output Y <- sweep(Y, 2, colMeans(Y)) - res <- ret_value(Y, ret_extra, method, X, scale, Y_init, iter, start_time, - cost_fn = cost_fn, opt_res$G, - perplexity, itercosts, - stop_lying_iter, start_late_lying_iter, opt_list, opt, - exaggeration_factor, late_exaggeration_factor, - optionals = ret_optionals, - pca = ifelse(pca && !whiten, initial_dims, 0), - whiten = ifelse(pca && whiten, initial_dims, 0), - use_cpp = use_cpp, n_threads = n_threads) - - res -} - -#' Best t-SNE Result From Multiple Initializations -#' -#' Run t-SNE multiple times from a random initialization, and return the -#' embedding with the lowest cost. -#' -#' This function ignores any value of \code{Y_init} you set, and uses -#' \code{Y_init = "rand"}. -#' -#' @param nrep Number of repeats. -#' @param keep_all If \code{TRUE}, then the return value is a list of lists, -#' indexed from 1 .. \code{nrep}, with each entry the result from each -#' \code{\link{smallvis}} run. Otherwise just the result with the lowest error -#' is returned. -#' @param ... Arguments to apply to each \code{\link{smallvis}} run. -#' @return The \code{\link{smallvis}} result with the lowest final cost, or -#' if \code{keep_all} is \code{TRUE} all results as a list, indexed as 1 .. -#' \code{nrep}. If \code{ret_extra} is not \code{FALSE}, then the final costs for all -#' \code{nrep} runs are also included in the return value list as a vector -#' called \code{all_costs}. In this case, if \code{keep_all} is \code{TRUE}, then -#' \code{all_costs} appears as an extra item on all results. Additionally, -#' each result will have an extra entry \code{best_rep}, giving the index of the -#' result with the lowest cost. -#' @examples -#' \dontrun{ -#' # Return best result out of five random initializations -#' tsne_iris_best <- smallvis_rep(nrep = 5, X = iris, perplexity = 50, method = "tsne", -#' ret_extra = TRUE) -#' # How much do the costs vary between runs? -#' range(tsne_iris_best$all_costs) -#' # Display best embedding found -#' plot(tsne_iris_best$Y) -#' -#' # Keep all results -#' # First result is in tsne_iris_rep[[1]], second in tsne_iris_rep[[2]] etc. -#' tsne_iris_rep <- smallvis_rep(nrep = 5, X = iris, perplexity = 50, method = "tsne", -#' ret_extra = TRUE, keep_all = TRUE) -#' # Index of result with smallest error is in special list item 'best_rep' -#' best_iris <- tsne_iris_rep[[tsne_iris_rep[[1]]$best_rep]] -#' -#' } -#' @export -smallvis_rep <- function(nrep = 10, keep_all = FALSE, ...) { - if (nrep < 1) { - stop("nrep must be 1 or greater") - } - varargs <- list(...) - best_res <- NULL - best_cost <- Inf - all_costs <- c() - # Keep requested return type for final result - ret_extra <- varargs$ret_extra - - # always return extra so we can find the cost - if (!should_ret_extra(ret_extra)) { - varargs$ret_extra <- TRUE - } - - varargs$Y_init <- "rand" - ret <- list() - - for (i in 1:nrep) { - # If verbose is not explicitly set to FALSE, it's TRUE by default - if (nnat(varargs$verbose) || is.null(varargs$verbose)) { - tsmessage("Starting embedding # ", i, " of ", nrep) - } - res <- do.call(smallvis, varargs) - - if (keep_all) { - ret[[i]] <- res - } - - final_cost <- res$itercosts[length(res$itercosts)] - names(final_cost) <- NULL - all_costs <- c(all_costs, final_cost) - - if (!keep_all) { - if (final_cost < best_cost) { - best_cost <- final_cost - best_res <- res - } - } - } - - if (keep_all) { - # if keep_all is TRUE and we asked for extra return info - # also add the final costs and the index of the best result to each result - if (should_ret_extra(ret_extra)) { - best_rep <- which.min(all_costs) - for (i in 1:nrep) { - ret[[i]]$all_costs <- all_costs - ret[[i]]$best_rep <- best_rep - } - } - # otherwise just store the Y-coordinates of each result - else { - for (i in 1:nrep) { - ret[[i]] <- ret[[i]]$Y - } - } - } - else { - # Only keeping one result - if (should_ret_extra(ret_extra)) { - # store info about other results on best result list - best_res$all_costs <- all_costs - ret <- best_res - } - else { - ret <- best_res$Y - } - } - ret -} - -# If ret_extra is NULL or FALSE, we aren't returning extra info -# If ret_extra is TRUE or a vector, we are returning extra info -should_ret_extra <- function(ret_extra) { - !is.null(ret_extra) && ((methods::is(ret_extra, "logical") && ret_extra) || - methods::is(ret_extra, "character")) -} - -#' Dimensionality Reduction With Perplexity Stepping -#' -#' Carry out dimensionality reduction of a (small) dataset using one of a -#' variety of neighbor embedding methods, using a decreasing value of -#' perplexity to avoid bad local minima. -#' -#' This function uses ideas similar to those in the NeRV (Venna et al., 2010) -#' and JSE (Lee et al., 2013), where to avoid local minima, the initial -#' optimization steps use affinities with larger bandwidths (NeRV) or larger -#' perplexity values (JSE). This implementation uses a series of decreasing -#' perplexity values, as in JSE. -#' -#' For details on the arguments that can be passed to the dimensionality -#' reduction routine, see the help text for \code{\link{smallvis}}. -#' -#' To avoid spending too much extra time in perplexity calibrations, the extra -#' perplexities start at the power of 2 closest to, but not greater than, -#' half the dataset size (in terms of number of objects). Further calibrations -#' are then carried out halving the perplexity each time, until the perplexity -#' specified by the user is reached. -#' -#' The number of iterations spent in the larger perplexity values is specified -#' by the \code{step_iter} parameter. This determines the total number -#' of iterations, e.g. if \code{step_iter = 250} and extra optimizations -#' at a perplexity of 1024, 512, 256, 128 and 64 will be carried out, these will -#' run for 50 iterations each. To keep the number of iterations equivalent to -#' that used by a single run of \code{\link{smallvis}}, the value of -#' \code{step_iter} is subtracted from the value of \code{max_iter} before -#' the optimization at the target perplexity is carried out, e.g. if -#' \code{max_iter = 1000} and \code{step_iter = 250}, the final -#' optimization will run for 750 iterations only. -#' -#' Any value of \code{tol}, \code{exaggeration_factor} and -#' \code{stop_lying_iter} provided is used only with the final optimization. -#' -#' @param step_iter Number of iterations to carry out the perplexity -#' stepping. Must be < the value of \code{max_iter}. -#' @param ... Arguments to be passed to \code{\link{smallvis}}. See 'Details' -#' for information on which arguments may be modified or ignored during certain -#' parts of the embedding. -#' @return The result of the final run of \code{\link{smallvis}} at the target -#' perplexity. -#' @examples -#' \dontrun{ -#' # t-SNE on the iris with L-BFGS optimization -#' # The 1000 max_iter is split between 250 iterations at perplexity = 64 -#' # and then 750 iterations at perplexity = 40. -#' iris_lbfgs_pstep <- smallvis_perpscale( -#' step_iter = 250, X = iris, scale = FALSE, verbose = TRUE, Y_init = "spca", -#' ret_extra = c("DX", "DY"), perplexity = 40, max_iter = 1000, opt = list("l-bfgs")) -#' } -#' @export -#' @references -#' Venna, J., Peltonen, J., Nybo, K., Aidos, H., & Kaski, S. (2010). -#' Information retrieval perspective to nonlinear dimensionality reduction for -#' data visualization. -#' \emph{Journal of Machine Learning Research}, \emph{11}, 451-490. -#' -#' Lee, J. A., Renard, E., Bernard, G., Dupont, P., & Verleysen, M. (2013). -#' Type 1 and 2 mixtures of Kullback-Leibler divergences as cost functions in -#' dimensionality reduction based on similarity preservation. -#' \emph{Neurocomputing}, \emph{112}, 92-108. -smallvis_perpstep <- function(step_iter = 250, ...) { - varargs <- list(...) - max_iter <- varargs$max_iter - if (is.null(max_iter)) { - max_iter <- 1000 - } - if (max_iter <= step_iter) { - stop("max_iter must be > step_iter") - } - - target_perplexity <- varargs$perplexity - if (is.null(target_perplexity)) { - target_perplexity <- 30 - } - - X <- varargs$X - if (methods::is(X, "dist")) { - n <- attr(X, "Size") - } - else { - n <- nrow(X) - } - perps <- scale_perps(n = n, target_perp = target_perplexity) - - nperps <- length(perps) - if (nperps > 0) { - max_iter_step <- max(1, floor(step_iter / nperps)) - max_iter_target <- max(1, max_iter - step_iter) - - # Save/Modify some options between step iterations and final optimization - ret_extra <- varargs$ret_extra - varargs$ret_extra <- FALSE - varargs$max_iter <- max_iter_step - tol <- varargs$tol - varargs$tol <- 0 - exaggeration_factor <- varargs$exaggeration_factor - varargs$exaggeration_factor <- 1 - - epoch <- varargs$epoch - varargs$epoch <- max_iter_step - - # Loop over initial perplexities - res <- NULL - for (i in 1:nperps) { - if (nnat(varargs$verbose)) { - tsmessage("Optimizing at step perplexity ", formatC(perps[i]), - " for ", max_iter_step, " iterations") - } - varargs$perplexity <- perps[i] - if (i > 1) { - varargs$Y_init <- res - } - res <- do.call(smallvis, varargs) - } - - varargs$Y_init <- res - varargs$max_iter <- max_iter_target - # Put the old arguments back before final optimization - varargs$perplexity <- target_perplexity - if (!is.null(ret_extra)) { - varargs$ret_extra <- ret_extra - } - if (!is.null(tol)) { - varargs$tol <- tol - } - if (!is.null(epoch)) { - varargs$epoch <- epoch - } - if (!is.null(exaggeration_factor)) { - varargs$exaggeration_factor <- exaggeration_factor - } - } - - if (nnat(varargs$verbose)) { - tsmessage("Optimizing at target perplexity ", formatC(target_perplexity), - " for ", max_iter_target, " iterations") - } - do.call(smallvis, varargs) -} - -# Utility function for perplexity step -scale_perps <- function(n, target_perp) { - max_perp <- n / 2 - max_perp <- 2 ^ floor(log(max_perp, 2)) - perp <- max_perp - - if (max_perp > target_perp) { - perps <- c() - while (perp > target_perp) { - perps <- c(perps, perp) - perp <- perp / 2 - } - } - - perps -} - - -# Input Preprocess -------------------------------------------------------- - -# Scale X according to various strategies -scale_input <- function(X, scale, verbose = FALSE) { - if (is.null(scale)) { - scale <- "none" - } - if (is.logical(scale)) { - if (scale) { - scale <- "scale" - } - else { - scale <- "none" - } - } - scale <- match.arg(tolower(scale), c("none", "scale", "range", "absmax")) - - switch(scale, - range = { - tsmessage("Range scaling X") - X <- as.matrix(X) - X <- X - min(X) - X <- X / max(X) - }, - absmax = { - tsmessage("Normalizing by abs-max") - X <- base::scale(X, scale = FALSE) - X <- X / abs(max(X)) - }, - scale = { - tsmessage("Scaling to zero mean and unit variance") - X <- Filter(stats::var, X) - tsmessage("Kept ", ncol(X), " non-zero-variance columns") - X <- base::scale(X, scale = TRUE) - }, - none = { - X <- as.matrix(X) - } + res <- ret_value( + Y, + ret_extra, + method, + X, + scale, + Y_init, + iter, + start_time, + cost_fn = cost_fn, + opt_res$G, + perplexity, + itercosts, + stop_lying_iter, + start_late_lying_iter, + opt_list, + opt, + exaggeration_factor, + late_exaggeration_factor, + optionals = ret_optionals, + pca = ifelse(pca && !whiten, initial_dims, 0), + whiten = ifelse(pca && whiten, initial_dims, 0), + use_cpp = use_cpp, + n_threads = n_threads ) - X -} - -# Reduce input dimensionality via PCA and also optionally whiten data -pca_preprocess <- function(X, pca, whiten, initial_dims, verbose = FALSE) { - # We won't do PCA if the rank of the input is less than the requested - # initial dimensionality - if (pca) { - pca <- min(nrow(X), ncol(X)) >= initial_dims - } - if (pca) { - if (whiten) { - tsmessage("Reducing initial dimensionality with PCA and ", - "whitening to ", initial_dims, " dims") - X <- pca_whiten(X = X, ncol = initial_dims, verbose = verbose) - } - else { - tsmessage("Reducing initial dimensionality with PCA to ", - initial_dims, " dims") - X <- pca_scores(X = X, ncol = initial_dims, verbose = verbose) - } - } - X -} - - - - -# Epoch Functions --------------------------------------------------------- - -do_epoch <- function(opt, cost, iter, Y, fn_val) { - if (!is.null(cost$epoch)) { - res <- cost$epoch(opt, cost, iter, Y, fn_val) - if (!is.null(res$opt)) { - opt <- res$opt - } - if (!is.null(res$cost)) { - cost <- res$cost - } - } - - list( - opt = opt, - cost = cost - ) -} - -# Helper function for epoch callback, allowing user to supply callbacks with -# multiple arities. -do_callback <- function(cb, Y, iter, cost = NULL, cost_fn = NULL, opt = NULL) { - nfs <- length(formals(cb)) - switch(nfs, - "1" = cb(Y), - "2" = cb(Y, iter), - "3" = cb(Y, iter, cost), - "4" = cb(Y, iter, cost, cost_fn), - "5" = cb(Y, iter, cost, cost_fn, opt) - ) -} - -# Create a callback for visualization -make_smallvis_cb <- function(df) { - force(df) - palette <- NULL - function(Y, iter, cost = NULL) { - if (is.null(palette)) { - palette <- vizier:::color_helper(df, color_scheme = grDevices::rainbow)$palette - } - title <- paste0("iter: ", iter) - if (!(is.null(cost) || is.na(cost))) { - title <- paste0(title, " cost = ", formatC(cost)) - } - vizier::embed_plot(Y, df, title = title, color_scheme = palette) - } + + res } # Result Export ----------------------------------------------------------- @@ -1805,19 +1769,33 @@ make_smallvis_cb <- function(df) { # If ret_extra is TRUE and iter > 0, then all the NULL-default parameters are # expected to be present. If iter == 0 then the return list will contain only # scaling and initialization information. -ret_value <- function(Y, ret_extra, method, X, scale, Y_init, iter, start_time = NULL, +ret_value <- function(Y, + ret_extra, + method, + X, + scale, + Y_init, + iter, + start_time = NULL, cost_fn = NULL, G = NULL, - perplexity = NULL, pca = 0, whiten = 0, + perplexity = NULL, + pca = 0, + whiten = 0, itercosts = NULL, - stop_lying_iter = NULL, start_late_lying_iter = NULL, - opt_input = NULL, opt_res = NULL, - exaggeration_factor = 1, late_exaggeration_factor = 1, - optionals = c(), use_cpp = FALSE, n_threads = 1) { + stop_lying_iter = NULL, + start_late_lying_iter = NULL, + opt_input = NULL, + opt_res = NULL, + exaggeration_factor = 1, + late_exaggeration_factor = 1, + optionals = c(), + use_cpp = FALSE, + n_threads = 1) { attr(Y, "dimnames") <- NULL if (ret_extra) { end_time <- Sys.time() - + if (methods::is(X, "dist")) { N <- attr(X, "Size") origD <- NULL @@ -1826,7 +1804,7 @@ ret_value <- function(Y, ret_extra, method, X, scale, Y_init, iter, start_time = N <- nrow(X) origD <- ncol(X) } - + res <- list( Y = Y, N = N, @@ -1837,24 +1815,24 @@ ret_value <- function(Y, ret_extra, method, X, scale, Y_init, iter, start_time = iter = iter, time_secs = as.numeric(end_time - start_time, units = "secs") ) - + if (!is.null(G)) { res$G2norm <- norm2(G) } - + if (pca > 0) { res$pca_dims <- pca } else if (whiten > 0) { res$whiten_dims <- whiten } - + if (is.null(cost_fn$pcost)) { cost_fn <- cost_grad(cost_fn, Y) cost_fn <- cost_point(cost_fn, Y) } res$costs <- cost_fn$pcost - + if (!is.null(opt_input)) { res$opt <- opt_input if (!is.null(opt_res$counts)) { @@ -1862,14 +1840,14 @@ ret_value <- function(Y, ret_extra, method, X, scale, Y_init, iter, start_time = } } - + # Don't report exaggeration settings if they didn't do anything if (exaggeration_factor == 1 || late_exaggeration_factor == 1) { if (exaggeration_factor == 1) { exaggeration_factor <- NULL stop_lying_iter <- NULL } - + if (late_exaggeration_factor == 1) { late_exaggeration_factor <- NULL start_late_lying_iter <- NULL @@ -1882,22 +1860,25 @@ ret_value <- function(Y, ret_extra, method, X, scale, Y_init, iter, start_time = late_exaggeration_factor <- NULL } } - - res <- c(res, list( - perplexity = perplexity, - itercosts = itercosts, - exaggeration_factor = exaggeration_factor, - stop_lying_iter = stop_lying_iter, - late_exaggeration_factor = late_exaggeration_factor, - start_late_lying_iter = start_late_lying_iter - )) - + + res <- c( + res, + list( + perplexity = perplexity, + itercosts = itercosts, + exaggeration_factor = exaggeration_factor, + stop_lying_iter = stop_lying_iter, + late_exaggeration_factor = late_exaggeration_factor, + start_late_lying_iter = start_late_lying_iter + ) + ) + # If using the Intrinsic Dimensionality method, use the chosen perplexity if (!is.null(cost_fn$idp)) { res$perplexity <- cost_fn$idp } - - + + optionals <- tolower(unique(optionals)) for (o in optionals) { exported <- NULL @@ -1925,12 +1906,12 @@ ret_value <- function(Y, ret_extra, method, X, scale, Y_init, iter, start_time = else if (o == "dy") { res$DY <- calc_d(Y, use_cpp = use_cpp, n_threads = n_threads) } - + if (o == "x") { res$X <- X } } - + res <- remove_nulls(res) res } From c78fa66dc6d99138e07d28c2e3bf3c0c8cf62cde Mon Sep 17 00:00:00 2001 From: James Melville Date: Sat, 3 Aug 2024 12:51:41 -0700 Subject: [PATCH 20/26] reformatting --- smallvis/R/smallvis.R | 1591 ++++++++++++++++++++--------------------- 1 file changed, 791 insertions(+), 800 deletions(-) diff --git a/smallvis/R/smallvis.R b/smallvis/R/smallvis.R index 241e7a0..0c5b12e 100644 --- a/smallvis/R/smallvis.R +++ b/smallvis/R/smallvis.R @@ -876,579 +876,203 @@ smallvis <- function(X, if (is.logical(epoch_callback)) { if (epoch_callback) { epoch_callback <- make_smallvis_cb(X) - } - else { + } else { epoch_callback <- NULL } - } - else if (is.function(epoch_callback)) { + } else if (is.function(epoch_callback)) { force(epoch_callback) } - + # The embedding method - method_names <- c( - "tsne", - "largevis", - "umap", - "tumap", - "ntumap", - "mmds", - "gmmds", - "asne", - "ssne", - "wtsne", - "wssne", - "hssne", - "ee", - "nerv", - "snerv", - "jse", - "sjse", - "smmds", - "sammon", - "tasne", - "trmsne", - "trsrsne", - "tmsne", - "arsrsne", - "rsrjse", - "rsrnerv", - "btsne", - "bssne", - "basne", - "btasne", - "bnerv", - "ballmmds", - "knnmmds", - "dhssne", - "pstsne", - "tsneu", - "skdtsne", - "usne", - "cetsne", - "tee", - "absne", - "chsne", - "hlsne", - "rklsne", - "jssne", - "gsne", - "abssne", - "bhssne" + cost_fn <- create_cost(method, perplexity, eps, n_threads, use_cpp) + + if (exaggeration_factor != 1) { + if (stop_lying_iter < 1) { + stop("stop_lying_iter must be >= 1") + } + } + + if (late_exaggeration_factor != 1) { + if (start_late_lying_iter < 1) { + stop("start_late_lying_iter must be >= 1") + } + } + + if (exaggeration_factor != 1 && late_exaggeration_factor != 1) { + if (start_late_lying_iter < stop_lying_iter) { + stop("start_late_lying_iter must be >= stop_lying_iter") + } + } + + if (methods::is(pca, "character") && pca == "whiten") { + pca <- TRUE + whiten <- TRUE + } else { + whiten <- FALSE + } + if (pca && initial_dims < k) { + stop( + "Initial PCA dimensionality must be larger than desired output ", + "dimension" + ) + } + + start_time <- NULL + ret_optionals <- c() + if (methods::is(ret_extra, "character")) { + ret_optionals <- ret_extra + ret_extra <- TRUE + } + + if (ret_extra) { + start_time <- Sys.time() + } + + if (methods::is(X, "dist")) { + n <- attr(X, "Size") + } else { + if (methods::is(X, "data.frame")) { + indexes <- which(vapply(X, is.numeric, logical(1))) + tsmessage("Found ", length(indexes), " numeric columns") + if (length(indexes) == 0) { + stop("No numeric columns found") + } + X <- X[, indexes] + } + + X <- scale_input(X, scale, verbose = verbose) + X <- pca_preprocess(X, pca, whiten, initial_dims, verbose = verbose) + n <- nrow(X) + } + + # Check for NA + if (any(is.na(X))) { + stop("Input data contains NA: missing data is not allowed") + } + + # Fail early as possible if matrix initializer is invalid + if (methods::is(Y_init, "matrix")) { + if (nrow(Y_init) != n || ncol(Y_init) != k) { + stop("Y_init matrix does not match necessary configuration for X") + } + } + + # Optimizer + if (opt[[1]] == "dbd" || opt[[1]] == "ndbd") { + if (eta == "optsne") { + eta <- n / exaggeration_factor + tsmessage("Using opt-SNE learning rate = ", formatC(eta)) + } + opt_list <- lmerge( + opt, + list( + momentum = momentum, + final_momentum = final_momentum, + mom_switch_iter = mom_switch_iter, + eta = eta, + min_gain = min_gain, + verbose = verbose + ) + ) + } else { + opt_list <- opt + } + opt <- opt_create(opt_list, verbose = verbose) + + # Initialize the cost function and create P + cost_fn <- cost_init( + cost_fn, + X, + max_iter = max_iter, + verbose = verbose, + ret_extra = ret_optionals ) - if (is.character(method)) { - method <- match.arg(tolower(method), method_names) - cost_fn <- switch( - method, - tsne = tsne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - umap = umap( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - largevis = largevis( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - tumap = tumap( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - ntumap = ntumap( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - mmds = mmds( - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - gmmds = gmmds( - k = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - asne = asne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - ssne = ssne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - wtsne = wtsne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - wssne = wssne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - hssne = hssne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - ee = ee( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - nerv = nerv( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - snerv = snerv( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - jse = jse( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - sjse = sjse( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - smmds = smmds( - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - sammon = sammon( - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - tasne = tasne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - trmsne = trmsne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - trsrsne = trsrsne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - tmsne = tmsne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - arsrsne = arsrsne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - rsrjse = rsrjse( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - rsrnerv = rsrnerv( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - btsne = btsne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - bssne = bssne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - basne = basne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - btasne = btasne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - bnerv = bnerv( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - ballmmds = ballmmds( - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - knnmmds = knnmmds( - k = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - dhssne = dhssne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - tsneu = tsneu( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - pstsne = pstsne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - skdtsne = skdtsne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - usne = usne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - cetsne = cetsne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - tee = tee( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - absne = absne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - chsne = chsne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - hlsne = hlsne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - gsne = gsne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - rklsne = rklsne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - jssne = jssne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - abssne = abssne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - bhssne = bhssne( - perplexity = perplexity, - n_threads = n_threads, - eps = eps, - use_cpp = use_cpp - ), - stop("BUG: someone forgot to implement option: '", method, "'") - ) - } - else { - if (is.list(method)) { - methodlist <- method - method_name <- methodlist[[1]] - methodlist[[1]] <- NULL - method <- match.arg(tolower(method_name), method_names) - if (exists(method_name)) { - fn <- get(method_name) - } - else { - stop("Unknown method: '", method_name, "'") + + # Output Initialization + if (!is.null(Y_init)) { + if (methods::is(Y_init, "matrix")) { + Y <- Y_init + Y_init <- "matrix" + } else { + Y_init <- match.arg( + tolower(Y_init), + c("rand", "pca", "spca", "laplacian", "normlaplacian") + ) + + if (!is_spectral_init(Y_init)) { + # We now handle scaling coordinates below, so spca is treated like pca + non_spectral_init <- Y_init + if (non_spectral_init == "spca") { + non_spectral_init <- "pca" + } + Y <- init_out(non_spectral_init, + X, + n, + k, + pca_preprocessed = pca, + verbose = verbose + ) + } else { + # Use normalized or unnormalized input weight for spectral initialization + # Pretty sure it doesn't matter, given the current options + A <- cost_fn$V + if (is.null(A)) { + A <- cost_fn$P + if (!is.null(A)) { + tsmessage("Using P for spectral initialization") + } + } else { + tsmessage("Using V for spectral initialization") + } + + if (is.null(A)) { + if (!is.null(cost_fn$knn)) { + tsmessage("Using knn 1 / 1 + R for spectral initialization") + A <- 1 / (1 + cost_fn$R) + A[cost_fn$knn == 0] <- 0 + } else if (!is.null(cost_fn$R)) { + tsmessage("Using 1/ (1 + R) for spectral initialization") + A <- 1 / (1 + cost_fn$R) + } else { + stop("No suitable input for spectral initialization") + } + } + + if (Y_init == "laplacian") { + tsmessage("Initializing from Laplacian Eigenmap") + Y <- laplacian_eigenmap(A, ndim = k) + } else { + tsmessage("Initializing from normalized Laplacian") + Y <- normalized_spectral_init(A, ndim = k) + } } } - else if (is.function(method)) { - fn <- method - methodlist <- list() - } - else { - stop("Bad method parameter type") - } - - param_names <- names(formals(fn)) - if ("perplexity" %in% param_names) { - methodlist$perplexity <- perplexity - } - if ("k" %in% param_names) { - methodlist$k <- perplexity - } - cost_fn <- do.call(fn, methodlist) - } - - if (exaggeration_factor != 1) { - if (stop_lying_iter < 1) { - stop("stop_lying_iter must be >= 1") + if (!is.null(Y_init_sdev) || + Y_init == "spca" || Y_init == "rand") { + if (is.null(Y_init_sdev)) { + Y_init_sdev <- 1e-4 + } + tsmessage("Scaling initial coords to sdev = ", formatC(Y_init_sdev)) + Y <- shrink_coords(Y, Y_init_sdev) } } - - if (late_exaggeration_factor != 1) { - if (start_late_lying_iter < 1) { - stop("start_late_lying_iter must be >= 1") - } - } - - if (exaggeration_factor != 1 && late_exaggeration_factor != 1) { - if (start_late_lying_iter < stop_lying_iter) { - stop("start_late_lying_iter must be >= stop_lying_iter") - } - } - - if (methods::is(pca, "character") && pca == "whiten") { - pca <- TRUE - whiten <- TRUE - } - else { - whiten <- FALSE - } - if (pca && initial_dims < k) { - stop("Initial PCA dimensionality must be larger than desired output ", - "dimension") - } - - start_time <- NULL - ret_optionals <- c() - if (methods::is(ret_extra, "character")) { - ret_optionals <- ret_extra - ret_extra <- TRUE - } - - if (ret_extra) { - start_time <- Sys.time() - } - - if (methods::is(X, "dist")) { - n <- attr(X, "Size") - } - else { - if (methods::is(X, "data.frame")) { - indexes <- which(vapply(X, is.numeric, logical(1))) - tsmessage("Found ", length(indexes), " numeric columns") - if (length(indexes) == 0) { - stop("No numeric columns found") - } - X <- X[, indexes] - } - - X <- scale_input(X, scale, verbose = verbose) - X <- pca_preprocess(X, pca, whiten, initial_dims, verbose = verbose) - n <- nrow(X) - } - - # Check for NA - if (any(is.na(X))) { - stop("Input data contains NA: missing data is not allowed") - } - - # Fail early as possible if matrix initializer is invalid - if (methods::is(Y_init, "matrix")) { - if (nrow(Y_init) != n || ncol(Y_init) != k) { - stop("Y_init matrix does not match necessary configuration for X") - } - } - - # Optimizer - if (opt[[1]] == "dbd" || opt[[1]] == "ndbd") { - if (eta == "optsne") { - eta <- n / exaggeration_factor - tsmessage("Using opt-SNE learning rate = ", formatC(eta)) - } - opt_list <- lmerge( - opt, - list( - momentum = momentum, - final_momentum = final_momentum, - mom_switch_iter = mom_switch_iter, - eta = eta, - min_gain = min_gain, - verbose = verbose - ) - ) - } - else { - opt_list <- opt - } - opt <- opt_create(opt_list, verbose = verbose) - - # Initialize the cost function and create P - cost_fn <- cost_init( - cost_fn, - X, - max_iter = max_iter, - verbose = verbose, - ret_extra = ret_optionals - ) - - # Output Initialization - if (!is.null(Y_init)) { - if (methods::is(Y_init, "matrix")) { - Y <- Y_init - Y_init <- "matrix" - } - else { - Y_init <- match.arg(tolower(Y_init), - c("rand", "pca", "spca", "laplacian", "normlaplacian")) - - if (!is_spectral_init(Y_init)) { - # We now handle scaling coordinates below, so spca is treated like pca - non_spectral_init <- Y_init - if (non_spectral_init == "spca") { - non_spectral_init <- "pca" - } - Y <- init_out(non_spectral_init, - X, - n, - k, - pca_preprocessed = pca, - verbose = verbose) - } - else { - # Use normalized or unnormalized input weight for spectral initialization - # Pretty sure it doesn't matter, given the current options - A <- cost_fn$V - if (is.null(A)) { - A <- cost_fn$P - if (!is.null(A)) { - tsmessage("Using P for spectral initialization") - } - } - else { - tsmessage("Using V for spectral initialization") - } - - if (is.null(A)) { - if (!is.null(cost_fn$knn)) { - tsmessage("Using knn 1 / 1 + R for spectral initialization") - A <- 1 / (1 + cost_fn$R) - A[cost_fn$knn == 0] <- 0 - } - else if (!is.null(cost_fn$R)) { - tsmessage("Using 1/ (1 + R) for spectral initialization") - A <- 1 / (1 + cost_fn$R) - } - else { - stop("No suitable input for spectral initialization") - } - } - - if (Y_init == "laplacian") { - tsmessage("Initializing from Laplacian Eigenmap") - Y <- laplacian_eigenmap(A, ndim = k) - } - else { - tsmessage("Initializing from normalized Laplacian") - Y <- normalized_spectral_init(A, ndim = k) - } - } - } - if (!is.null(Y_init_sdev) || - Y_init == "spca" || Y_init == "rand") { - if (is.null(Y_init_sdev)) { - Y_init_sdev <- 1e-4 - } - tsmessage("Scaling initial coords to sdev = ", formatC(Y_init_sdev)) - Y <- shrink_coords(Y, Y_init_sdev) - } - } - + cost <- NA itercosts <- c() if (iter0_cost && (verbose || ret_extra)) { cost_eval_res <- cost_eval(cost_fn, Y) cost_fn <- cost_eval_res$cost cost <- cost_eval_res$value - + tsmessage("Iteration #0 error: ", formatC(cost)) - + if (ret_extra) { names(cost) <- 0 itercosts <- c(itercosts, cost) } cost_fn <- cost_clear(cost_fn) } - + # Display initialization if (!is.null(epoch_callback)) { do_callback(epoch_callback, Y, 0, cost, cost_fn, opt) @@ -1469,13 +1093,13 @@ smallvis <- function(X, optionals = ret_optionals, pca = ifelse(pca && !whiten, initial_dims, 0), whiten = ifelse(pca && - whiten, initial_dims, 0), + whiten, initial_dims, 0), use_cpp = use_cpp, n_threads = n_threads ) ) } - + opt_stages <- c() if (exaggeration_factor != 1) { opt_stages <- c(opt_stages, "early") @@ -1485,7 +1109,7 @@ smallvis <- function(X, opt_stages <- c(opt_stages, "late") } opt_stage_idx <- 1 - + opt_epoch <- epoch opt_tol <- tol old_cdiffrc <- NULL @@ -1506,15 +1130,14 @@ smallvis <- function(X, " iterations" ) } - } - else { + } else { stop_lying_iter <- 0 } - + old_cost <- NULL tolval <- NULL tsmessage("Optimizing coordinates") - + # Use a while loop, so we can change max_iter inside the loop iter <- 0 while (iter < max_iter) { @@ -1523,18 +1146,18 @@ smallvis <- function(X, opt <- opt_res$opt cost_fn <- opt_res$cost_fn Y <- opt_res$Y - + if (!is.null(cost_fn$P) && iter == stop_lying_iter && - exaggeration_factor != 1) { + exaggeration_factor != 1) { tsmessage("Switching off exaggeration at iter ", iter) cost_fn <- stop_exaggerating(cost_fn, exaggeration_factor) opt_stage_idx <- opt_stage_idx + 1 epoch <- opt_epoch tol <- opt_tol } - + if (!is.null(cost_fn$P) && iter == start_late_lying_iter && - late_exaggeration_factor != 1) { + late_exaggeration_factor != 1) { tsmessage( "Starting late exaggeration = ", formatC(late_exaggeration_factor), @@ -1544,223 +1167,598 @@ smallvis <- function(X, max_iter ) cost_fn <- start_exaggerating(cost_fn, late_exaggeration_factor) - + opt_stage_idx <- opt_stage_idx + 1 } - - if (nnat(opt$is_terminated)) { - tsmessage( - "Iteration #", - iter, - " stopping early: optimizer reports convergence: ", - opt$terminate$what - ) - max_iter <- iter - break + + if (nnat(opt$is_terminated)) { + tsmessage( + "Iteration #", + iter, + " stopping early: optimizer reports convergence: ", + opt$terminate$what + ) + max_iter <- iter + break + } + + # Recenter after each iteration + Y <- sweep(Y, 2, colMeans(Y)) + + if ((epoch > 0 && iter %% epoch == 0) || iter == max_iter) { + stop_early <- FALSE + + cost_eval_res <- cost_eval(cost_fn, Y, opt_res) + cost_fn <- cost_eval_res$cost + cost <- cost_eval_res$value + + if (!is.null(old_cost)) { + tolval <- reltol(cost, old_cost) / epoch + } + + if (verbose) { + tsmessage( + "Iteration #", + iter, + " error: ", + formatC(cost), + " ||G||2 = ", + formatC(norm2(opt_res$G)), + appendLF = FALSE + ) + if (!is.null(tolval)) { + message(" tol = ", formatC(tolval), appendLF = FALSE) + } + + if (!is.null(opt$counts)) { + message(" nf = ", + opt$counts$fn, + " ng = ", + opt$counts$gr, + appendLF = FALSE + ) + } + + # special treatment for mize innards + if (!is.null(opt$stages$gradient_descent$step_size$value)) { + opt$stages$gradient_descent$step_size$value + message( + " alpha = ", + formatC(opt$stages$gradient_descent$step_size$value), + appendLF = FALSE + ) + } + + if (!is.null(old_cost) && cost > old_cost) { + message(" !", appendLF = FALSE) + } + + if (!is.null(opt$epoch)) { + opt <- opt$epoch(Y, iter, cost, cost_fn, opt) + } + + message() + utils::flush.console() + } + if (!is.null(epoch_callback)) { + do_callback(epoch_callback, Y, iter, cost, cost_fn, opt) + } + + if (ret_extra) { + names(cost) <- iter + itercosts <- c(itercosts, cost) + } + + # Early stopping tests + if (!is.null(ee_mon_epoch)) { + cdiff <- old_cost - cost + cdiffrc <- cdiff / old_cost + if (length(cdiffrc) > 0 && + opt_stages[opt_stage_idx] == "early") { + if (iter > ee_mon_wait && + length(cdiffrc) > 0 && length(old_cdiffrc) > 0 && + cdiffrc < old_cdiffrc) { + if (ee_mon_buffer < 1) { + stop_early <- TRUE + tsmessage("Stopping early: EE relative change reached maximum") + } else { + ee_mon_buffer <- ee_mon_buffer - 1 + } + } + } + old_cdiffrc <- cdiffrc + } + if (cost < min_cost) { + stop_early <- TRUE + tsmessage("Stopping early: cost fell below min_cost") + } + + if (!nnat(opt$is_terminated) && !is.null(tolval) && + tolval < tol && cost <= old_cost && + (iter > stop_lying_iter + tol_wait || + opt_stages[opt_stage_idx] != "opt")) { + stop_early <- TRUE + tsmessage( + "Stopping early: relative tolerance (", + formatC(tol), + ") met" + ) + } + + # Alternative tolerance grad 2norm doesn't need cost to decrease to stop + # (Use this for certain settings with e.g. LargeVis where numerical issues + # can cause the cost function to increase almost negligibly) + g2tolval <- (norm2(opt_res$G)) + if (!nnat(opt$is_terminated) && + !is.null(g2tol) && g2tolval < g2tol && + (iter > stop_lying_iter + tol_wait || + opt_stages[opt_stage_idx] != "opt")) { + stop_early <- TRUE + tsmessage( + "Stopping early: ||G||2 tolerance (", + formatC(g2tol), + ") met" + ) + } + + # Stop current stage early if we aren't making progress + if (stop_early) { + if (opt_stage_idx == length(opt_stages)) { + # run out of optimization stages, so give up + break + } + + opt_stage <- opt_stages[opt_stage_idx] + if (opt_stage == "early") { + # stop early exaggeration and adjust mom_switch_iter accordingly + # Only applies to DBD optimizer + n_low_mom_iters <- mom_switch_iter - stop_lying_iter + stop_lying_iter <- iter + 1 + mom_switch_iter <- stop_lying_iter + n_low_mom_iters + opt$mom_switch_iter <- mom_switch_iter + } + + # we know there is at least one more stage or we would have hit the + # break earlier + next_opt_stage <- opt_stages[opt_stage_idx + 1] + + switch(next_opt_stage, + opt = tsmessage("Proceeding to main optimization stage"), + late = { + n_late_exagg_iters <- max_iter - start_late_lying_iter + start_late_lying_iter <- iter + 1 + max_iter <- start_late_lying_iter + n_late_exagg_iters + tsmessage("Proceeding to late exaggeration stage") + }, + stop("BUG: unknown optimization stage '", next_opt_stage, "'") + ) + } + + if (nnat(opt$is_terminated)) { + break + } + + old_cost <- cost + + # Any special custom epoch stuff + epoch_res <- do_epoch(opt, cost_fn, iter, Y, cost) + opt <- epoch_res$opt + cost_fn <- epoch_res$cost + } + } + + if (opt_stages[opt_stage_idx] == "early") { + cost_fn <- stop_exaggerating(cost_fn, exaggeration_factor) + } + if (opt_stages[opt_stage_idx] == "late") { + cost_fn <- stop_exaggerating(cost_fn, late_exaggeration_factor) + } + + # Recenter before output + Y <- sweep(Y, 2, colMeans(Y)) + res <- ret_value( + Y, + ret_extra, + method, + X, + scale, + Y_init, + iter, + start_time, + cost_fn = cost_fn, + opt_res$G, + perplexity, + itercosts, + stop_lying_iter, + start_late_lying_iter, + opt_list, + opt, + exaggeration_factor, + late_exaggeration_factor, + optionals = ret_optionals, + pca = ifelse(pca && !whiten, initial_dims, 0), + whiten = ifelse(pca && whiten, initial_dims, 0), + use_cpp = use_cpp, + n_threads = n_threads + ) + + res +} + +create_cost <- function(method, + perplexity, + eps, + n_threads, + use_cpp) { + method_names <- c( + "tsne", + "largevis", + "umap", + "tumap", + "ntumap", + "mmds", + "gmmds", + "asne", + "ssne", + "wtsne", + "wssne", + "hssne", + "ee", + "nerv", + "snerv", + "jse", + "sjse", + "smmds", + "sammon", + "tasne", + "trmsne", + "trsrsne", + "tmsne", + "arsrsne", + "rsrjse", + "rsrnerv", + "btsne", + "bssne", + "basne", + "btasne", + "bnerv", + "ballmmds", + "knnmmds", + "dhssne", + "pstsne", + "tsneu", + "skdtsne", + "usne", + "cetsne", + "tee", + "absne", + "chsne", + "hlsne", + "rklsne", + "jssne", + "gsne", + "abssne", + "bhssne" + ) + if (is.character(method)) { + method <- match.arg(tolower(method), method_names) + cost_fn <- switch(method, + tsne = tsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + umap = umap( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + largevis = largevis( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + tumap = tumap( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + ntumap = ntumap( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + mmds = mmds( + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + gmmds = gmmds( + k = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + asne = asne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + ssne = ssne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + wtsne = wtsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + wssne = wssne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + hssne = hssne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + ee = ee( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + nerv = nerv( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + snerv = snerv( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + jse = jse( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + sjse = sjse( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + smmds = smmds( + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + sammon = sammon( + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + tasne = tasne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + trmsne = trmsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + trsrsne = trsrsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + tmsne = tmsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + arsrsne = arsrsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + rsrjse = rsrjse( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + rsrnerv = rsrnerv( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + btsne = btsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + bssne = bssne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + basne = basne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + btasne = btasne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + bnerv = bnerv( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + ballmmds = ballmmds( + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + knnmmds = knnmmds( + k = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + dhssne = dhssne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + tsneu = tsneu( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + pstsne = pstsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + skdtsne = skdtsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + usne = usne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + cetsne = cetsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + tee = tee( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + absne = absne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + chsne = chsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + hlsne = hlsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + gsne = gsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + rklsne = rklsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + jssne = jssne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + abssne = abssne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + bhssne = bhssne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + use_cpp = use_cpp + ), + stop("BUG: someone forgot to implement option: '", method, "'") + ) + } else { + if (is.list(method)) { + methodlist <- method + method_name <- methodlist[[1]] + methodlist[[1]] <- NULL + method <- match.arg(tolower(method_name), method_names) + if (exists(method_name)) { + fn <- get(method_name) + } else { + stop("Unknown method: '", method_name, "'") + } + } else if (is.function(method)) { + fn <- method + methodlist <- list() + } else { + stop("Bad method parameter type") + } + + param_names <- names(formals(fn)) + if ("perplexity" %in% param_names) { + methodlist$perplexity <- perplexity } - - # Recenter after each iteration - Y <- sweep(Y, 2, colMeans(Y)) - - if ((epoch > 0 && iter %% epoch == 0) || iter == max_iter) { - stop_early <- FALSE - - cost_eval_res <- cost_eval(cost_fn, Y, opt_res) - cost_fn <- cost_eval_res$cost - cost <- cost_eval_res$value - - if (!is.null(old_cost)) { - tolval <- reltol(cost, old_cost) / epoch - } - - if (verbose) { - tsmessage( - "Iteration #", - iter, - " error: ", - formatC(cost) - , - " ||G||2 = ", - formatC(norm2(opt_res$G)) - , - appendLF = FALSE - ) - if (!is.null(tolval)) { - message(" tol = ", formatC(tolval), appendLF = FALSE) - } - - if (!is.null(opt$counts)) { - message(" nf = ", - opt$counts$fn, - " ng = ", - opt$counts$gr, - appendLF = FALSE) - } - - # special treatment for mize innards - if (!is.null(opt$stages$gradient_descent$step_size$value)) { - opt$stages$gradient_descent$step_size$value - message( - " alpha = ", - formatC(opt$stages$gradient_descent$step_size$value), - appendLF = FALSE - ) - } - - if (!is.null(old_cost) && cost > old_cost) { - message(" !", appendLF = FALSE) - } - - if (!is.null(opt$epoch)) { - opt <- opt$epoch(Y, iter, cost, cost_fn, opt) - } - - message() - utils::flush.console() - } - if (!is.null(epoch_callback)) { - do_callback(epoch_callback, Y, iter, cost, cost_fn, opt) - } - - if (ret_extra) { - names(cost) <- iter - itercosts <- c(itercosts, cost) - } - - # Early stopping tests - if (!is.null(ee_mon_epoch)) { - cdiff <- old_cost - cost - cdiffrc <- cdiff / old_cost - if (length(cdiffrc) > 0 && - opt_stages[opt_stage_idx] == "early") { - if (iter > ee_mon_wait && - length(cdiffrc) > 0 && length(old_cdiffrc) > 0 && - cdiffrc < old_cdiffrc) { - if (ee_mon_buffer < 1) { - stop_early <- TRUE - tsmessage("Stopping early: EE relative change reached maximum") - } - else { - ee_mon_buffer <- ee_mon_buffer - 1 - } - } - } - old_cdiffrc <- cdiffrc - } - if (cost < min_cost) { - stop_early <- TRUE - tsmessage("Stopping early: cost fell below min_cost") - } - - if (!nnat(opt$is_terminated) && !is.null(tolval) && - tolval < tol && cost <= old_cost && - (iter > stop_lying_iter + tol_wait - || opt_stages[opt_stage_idx] != "opt")) { - stop_early <- TRUE - tsmessage("Stopping early: relative tolerance (", - formatC(tol), - ") met") - } - - # Alternative tolerance grad 2norm doesn't need cost to decrease to stop - # (Use this for certain settings with e.g. LargeVis where numerical issues - # can cause the cost function to increase almost negligibly) - g2tolval <- (norm2(opt_res$G)) - if (!nnat(opt$is_terminated) && - !is.null(g2tol) && g2tolval < g2tol && - (iter > stop_lying_iter + tol_wait - || opt_stages[opt_stage_idx] != "opt")) { - stop_early <- TRUE - tsmessage("Stopping early: ||G||2 tolerance (", - formatC(g2tol), - ") met") - } - - # Stop current stage early if we aren't making progress - if (stop_early) { - if (opt_stage_idx == length(opt_stages)) { - # run out of optimization stages, so give up - break - } - - opt_stage <- opt_stages[opt_stage_idx] - if (opt_stage == "early") { - # stop early exaggeration and adjust mom_switch_iter accordingly - # Only applies to DBD optimizer - n_low_mom_iters <- mom_switch_iter - stop_lying_iter - stop_lying_iter <- iter + 1 - mom_switch_iter <- stop_lying_iter + n_low_mom_iters - opt$mom_switch_iter <- mom_switch_iter - } - - # we know there is at least one more stage or we would have hit the - # break earlier - next_opt_stage <- opt_stages[opt_stage_idx + 1] - - switch( - next_opt_stage, - opt = tsmessage("Proceeding to main optimization stage"), - late = { - n_late_exagg_iters <- max_iter - start_late_lying_iter - start_late_lying_iter <- iter + 1 - max_iter <- start_late_lying_iter + n_late_exagg_iters - tsmessage("Proceeding to late exaggeration stage") - }, - stop("BUG: unknown optimization stage '", next_opt_stage, "'") - ) - } - - if (nnat(opt$is_terminated)) { - break - } - - old_cost <- cost - - # Any special custom epoch stuff - epoch_res <- do_epoch(opt, cost_fn, iter, Y, cost) - opt <- epoch_res$opt - cost_fn <- epoch_res$cost + if ("k" %in% param_names) { + methodlist$k <- perplexity } + cost_fn <- do.call(fn, methodlist) } - - if (opt_stages[opt_stage_idx] == "early") { - cost_fn <- stop_exaggerating(cost_fn, exaggeration_factor) - } - if (opt_stages[opt_stage_idx] == "late") { - cost_fn <- stop_exaggerating(cost_fn, late_exaggeration_factor) - } - - # Recenter before output - Y <- sweep(Y, 2, colMeans(Y)) - res <- ret_value( - Y, - ret_extra, - method, - X, - scale, - Y_init, - iter, - start_time, - cost_fn = cost_fn, - opt_res$G, - perplexity, - itercosts, - stop_lying_iter, - start_late_lying_iter, - opt_list, - opt, - exaggeration_factor, - late_exaggeration_factor, - optionals = ret_optionals, - pca = ifelse(pca && !whiten, initial_dims, 0), - whiten = ifelse(pca && whiten, initial_dims, 0), - use_cpp = use_cpp, - n_threads = n_threads - ) - - res } + # Result Export ----------------------------------------------------------- # Prepare the return value. @@ -1795,16 +1793,15 @@ ret_value <- function(Y, attr(Y, "dimnames") <- NULL if (ret_extra) { end_time <- Sys.time() - + if (methods::is(X, "dist")) { N <- attr(X, "Size") origD <- NULL - } - else { + } else { N <- nrow(X) origD <- ncol(X) } - + res <- list( Y = Y, N = N, @@ -1815,52 +1812,50 @@ ret_value <- function(Y, iter = iter, time_secs = as.numeric(end_time - start_time, units = "secs") ) - + if (!is.null(G)) { res$G2norm <- norm2(G) } - + if (pca > 0) { res$pca_dims <- pca - } - else if (whiten > 0) { + } else if (whiten > 0) { res$whiten_dims <- whiten } - + if (is.null(cost_fn$pcost)) { cost_fn <- cost_grad(cost_fn, Y) cost_fn <- cost_point(cost_fn, Y) } res$costs <- cost_fn$pcost - + if (!is.null(opt_input)) { res$opt <- opt_input if (!is.null(opt_res$counts)) { res$opt$counts <- opt_res$counts } } - - + + # Don't report exaggeration settings if they didn't do anything if (exaggeration_factor == 1 || late_exaggeration_factor == 1) { if (exaggeration_factor == 1) { exaggeration_factor <- NULL stop_lying_iter <- NULL } - + if (late_exaggeration_factor == 1) { late_exaggeration_factor <- NULL start_late_lying_iter <- NULL } - } - else { + } else { # Don't report start_late_lying_iter if we never got there if (start_late_lying_iter > iter) { start_late_lying_iter <- NULL late_exaggeration_factor <- NULL } } - + res <- c( res, list( @@ -1872,13 +1867,13 @@ ret_value <- function(Y, start_late_lying_iter = start_late_lying_iter ) ) - + # If using the Intrinsic Dimensionality method, use the chosen perplexity if (!is.null(cost_fn$idp)) { res$perplexity <- cost_fn$idp } - - + + optionals <- tolower(unique(optionals)) for (o in optionals) { exported <- NULL @@ -1889,8 +1884,7 @@ ret_value <- function(Y, if (!is.null(exported)) { if (nchar(o) < 3) { res[[toupper(o)]] <- exported - } - else { + } else { res[[o]] <- exported } } @@ -1898,24 +1892,21 @@ ret_value <- function(Y, else if (o == "dx") { if (methods::is(X, "dist")) { res$DX <- X - } - else { + } else { res$DX <- calc_d(X, use_cpp = use_cpp, n_threads = n_threads) } - } - else if (o == "dy") { + } else if (o == "dy") { res$DY <- calc_d(Y, use_cpp = use_cpp, n_threads = n_threads) } - + if (o == "x") { res$X <- X } } - + res <- remove_nulls(res) res - } - else { + } else { Y } } From 9c67c6774ea9022f37a99f3fb095ad1975b9f922 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sat, 3 Aug 2024 20:44:26 -0700 Subject: [PATCH 21/26] perplexity using 3 * knn --- smallvis/R/RcppExports.R | 4 ++ smallvis/R/perplexity.R | 45 +++++++----- smallvis/src/RcppExports.cpp | 17 +++++ smallvis/src/perplexity.cpp | 129 +++++++++++++++++++++++++++++++++++ 4 files changed, 179 insertions(+), 16 deletions(-) diff --git a/smallvis/R/RcppExports.R b/smallvis/R/RcppExports.R index 3728f9e..d41745b 100644 --- a/smallvis/R/RcppExports.R +++ b/smallvis/R/RcppExports.R @@ -25,6 +25,10 @@ mmds_grad_cpp <- function(R, D, Y, eps, n_threads) { .Call(`_smallvis_mmds_grad_cpp`, R, D, Y, eps, n_threads) } +find_beta_knn_cpp <- function(knn_distances, knn_indices, perplexity = 15, tol = 1e-5, max_tries = 50L, n_threads = 1L) { + .Call(`_smallvis_find_beta_knn_cpp`, knn_distances, knn_indices, perplexity, tol, max_tries, n_threads) +} + find_beta_cpp <- function(X, perplexity = 15, tol = 1e-5, max_tries = 50L, n_threads = 1L) { .Call(`_smallvis_find_beta_cpp`, X, perplexity, tol, max_tries, n_threads) } diff --git a/smallvis/R/perplexity.R b/smallvis/R/perplexity.R index d2e59f1..627c8aa 100644 --- a/smallvis/R/perplexity.R +++ b/smallvis/R/perplexity.R @@ -488,7 +488,7 @@ sne_init <- function(cost, row_normalize = TRUE, normalize = TRUE, n_threads = 0, - use_cpp = use_cpp, + use_cpp = FALSE, verbose = FALSE, ret_extra = c()) { if (tolower(kernel) == "knn") { @@ -559,21 +559,34 @@ sne_init <- function(cost, if (!is.numeric(perplexity)) { stop("Unknown perplexity method, '", perplexity[[1]], "'") } - tsmessage( - "Commencing calibration for perplexity = ", - format_perps(perplexity) - ) - if (use_cpp) { - P <- find_beta_cpp(X, perplexity, tol = 1e-5, n_threads = n_threads)$W - } else { - x2ares <- x2aff( - X, - perplexity, - tol = 1e-5, - kernel = kernel, - verbose = verbose - ) - P <- x2ares$W + if (kernel == "perpknn") { + k <- 3 * perplexity + if (k > nrow(X) - 1) { + warning("Perplexity probably too high for number of points,", + " result may not be meaningful") + k <- nrow(X) - 1 + } + tsmessage("Commencing calibration for perplexity = ", + format_perps(perplexity), " with k = ", k) + knn <- rnndescent::brute_force_knn(X, k = k + 1, n_threads = n_threads) + knn_dist <- knn$dist[, 2:(k + 1)] + knn_idx <- knn$idx[, 2:(k + 1)] + P <- find_beta_knn_cpp(knn_dist, knn_idx, perplexity = perplexity, + n_threads = n_threads)$P + } + else { + tsmessage("Commencing calibration for perplexity = ", + format_perps(perplexity)) + if (use_cpp) { + P <- find_beta_cpp(X, perplexity, tol = 1e-5, n_threads = n_threads)$W + } else { + x2ares <- x2aff(X, + perplexity, + tol = 1e-5, + kernel = kernel, + verbose = verbose) + P <- x2ares$W + } } } diff --git a/smallvis/src/RcppExports.cpp b/smallvis/src/RcppExports.cpp index 93960dc..ef11c3b 100644 --- a/smallvis/src/RcppExports.cpp +++ b/smallvis/src/RcppExports.cpp @@ -88,6 +88,22 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// find_beta_knn_cpp +List find_beta_knn_cpp(const NumericMatrix& knn_distances, const IntegerMatrix& knn_indices, double perplexity, double tol, int max_tries, std::size_t n_threads); +RcppExport SEXP _smallvis_find_beta_knn_cpp(SEXP knn_distancesSEXP, SEXP knn_indicesSEXP, SEXP perplexitySEXP, SEXP tolSEXP, SEXP max_triesSEXP, SEXP n_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const NumericMatrix& >::type knn_distances(knn_distancesSEXP); + Rcpp::traits::input_parameter< const IntegerMatrix& >::type knn_indices(knn_indicesSEXP); + Rcpp::traits::input_parameter< double >::type perplexity(perplexitySEXP); + Rcpp::traits::input_parameter< double >::type tol(tolSEXP); + Rcpp::traits::input_parameter< int >::type max_tries(max_triesSEXP); + Rcpp::traits::input_parameter< std::size_t >::type n_threads(n_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(find_beta_knn_cpp(knn_distances, knn_indices, perplexity, tol, max_tries, n_threads)); + return rcpp_result_gen; +END_RCPP +} // find_beta_cpp List find_beta_cpp(const NumericMatrix& X, double perplexity, double tol, int max_tries, std::size_t n_threads); RcppExport SEXP _smallvis_find_beta_cpp(SEXP XSEXP, SEXP perplexitySEXP, SEXP tolSEXP, SEXP max_triesSEXP, SEXP n_threadsSEXP) { @@ -111,6 +127,7 @@ static const R_CallMethodDef CallEntries[] = { {"_smallvis_d2_to_tweight_cpp", (DL_FUNC) &_smallvis_d2_to_tweight_cpp, 2}, {"_smallvis_tsne_grad_cpp", (DL_FUNC) &_smallvis_tsne_grad_cpp, 5}, {"_smallvis_mmds_grad_cpp", (DL_FUNC) &_smallvis_mmds_grad_cpp, 5}, + {"_smallvis_find_beta_knn_cpp", (DL_FUNC) &_smallvis_find_beta_knn_cpp, 6}, {"_smallvis_find_beta_cpp", (DL_FUNC) &_smallvis_find_beta_cpp, 5}, {NULL, NULL, 0} }; diff --git a/smallvis/src/perplexity.cpp b/smallvis/src/perplexity.cpp index b261897..af7c85d 100644 --- a/smallvis/src/perplexity.cpp +++ b/smallvis/src/perplexity.cpp @@ -9,6 +9,83 @@ using namespace Rcpp; +void find_beta_knn(std::vector &knn_distances, std::size_t n, + std::size_t k, double perplexity, double logU, double tol, + int max_tries, std::vector &W, + std::vector &beta, int &bad_perp, + std::size_t start_row, std::size_t end_row) { + + for (std::size_t i = start_row; i < end_row; ++i) { + const std::size_t idx = i * k; + double betamin = -std::numeric_limits::infinity(); + double betamax = std::numeric_limits::infinity(); + + // Square the distances + for (std::size_t j = 0; j < k; ++j) { + knn_distances[idx + j] *= knn_distances[idx + j]; + } + + // Initial guess for beta: 0.5 * perplexity / mean(knn_distances) + double sum_d2i = std::accumulate(knn_distances.begin() + idx, + knn_distances.begin() + idx + k, 0.0); + beta[i] = (0.5 * perplexity * k) / sum_d2i; + + double Z = 0.0; + double entropy = 0.0; + for (std::size_t j = 0; j < k; ++j) { + W[idx + j] = exp(-knn_distances[idx + j] * beta[i]); + entropy += knn_distances[idx + j] * W[idx + j]; + Z += W[idx + j]; + } + if (Z == 0.0) { + entropy = 0.0; + } else { + entropy = (entropy / Z) * beta[i] + log(Z); + } + + double Hdiff = entropy - logU; + int tries = 0; + while (fabs(Hdiff) > tol && tries < max_tries) { + if (Hdiff > 0) { + betamin = beta[i]; + if (std::isinf(betamax)) { + beta[i] *= 2; + } else { + beta[i] = (beta[i] + betamax) / 2; + } + } else { + betamax = beta[i]; + if (std::isinf(betamin)) { + beta[i] /= 2; + } else { + beta[i] = (beta[i] + betamin) / 2; + } + } + + Z = 0.0; + entropy = 0.0; + for (std::size_t j = 0; j < k; ++j) { + W[idx + j] = exp(-knn_distances[idx + j] * beta[i]); + entropy += knn_distances[idx + j] * W[idx + j]; + Z += W[idx + j]; + } + if (Z == 0.0) { + entropy = 0.0; + } else { + entropy = (entropy / Z) * beta[i] + log(Z); + } + + Hdiff = entropy - logU; + tries++; + } + + if (fabs(Hdiff) > tol) { + bad_perp++; + std::fill(W.begin() + idx, W.begin() + idx + k, 1.0 / k); + } + } +} + void find_beta(const std::vector &data, std::size_t n, std::size_t d, double perplexity, double logU, double tol, int max_tries, std::vector &W, std::vector &beta, int &bad_perp, @@ -114,6 +191,58 @@ void find_beta(const std::vector &data, std::size_t n, std::size_t d, } } +// [[Rcpp::export]] +List find_beta_knn_cpp(const NumericMatrix &knn_distances, + const IntegerMatrix &knn_indices, double perplexity = 15, + double tol = 1e-5, int max_tries = 50, + std::size_t n_threads = 1) { + + const std::size_t n = knn_distances.nrow(); + const std::size_t k = knn_distances.ncol(); + const double logU = log(perplexity); + + // Flatten the knn_distances matrix into a vector + std::vector knn_distances_vec(n * k); + for (std::size_t i = 0; i < n; ++i) { + for (std::size_t j = 0; j < k; ++j) { + knn_distances_vec[i * k + j] = knn_distances(i, j); + } + } + + std::vector W(n * k, 0.0); + std::vector beta(n, 0.0); + int bad_perp = 0; + + if (n_threads > 1) { + std::size_t chunk_size = (n + n_threads - 1) / n_threads; + std::vector threads; + for (std::size_t t = 0; t < n_threads; ++t) { + std::size_t start_row = t * chunk_size; + std::size_t end_row = std::min(start_row + chunk_size, n); + threads.emplace_back(find_beta_knn, std::ref(knn_distances_vec), n, k, + perplexity, logU, tol, max_tries, std::ref(W), + std::ref(beta), std::ref(bad_perp), start_row, + end_row); + } + for (auto &thread : threads) { + thread.join(); + } + } else { + find_beta_knn(knn_distances_vec, n, k, perplexity, logU, tol, max_tries, W, + beta, bad_perp, 0, n); + } + + NumericMatrix P(n, n); + for (std::size_t i = 0; i < n; ++i) { + for (std::size_t j = 0; j < k; ++j) { + P(i, knn_indices(i, j) - 1) = W[i * k + j]; + } + } + + return List::create(Named("P") = P, Named("beta") = beta, + Named("bad_perp") = bad_perp); +} + // [[Rcpp::export]] List find_beta_cpp(const NumericMatrix &X, double perplexity = 15, double tol = 1e-5, int max_tries = 50, From e84cc1cbbb9754765bb6aa03089c379310ca469f Mon Sep 17 00:00:00 2001 From: James Melville Date: Sat, 3 Aug 2024 21:50:47 -0700 Subject: [PATCH 22/26] unroll loops we know are 2D --- smallvis/src/gradients.cpp | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/smallvis/src/gradients.cpp b/smallvis/src/gradients.cpp index 055202f..b0e4062 100644 --- a/smallvis/src/gradients.cpp +++ b/smallvis/src/gradients.cpp @@ -31,12 +31,12 @@ void mmds_grad(const std::vector &R, const std::vector &D, void tsne_grad(const std::vector &P, const std::vector &W, double Z, const std::vector &Y, std::vector &gradient, std::size_t start, - std::size_t end, std::size_t n, std::size_t d) { + std::size_t end, std::size_t n) { const double Z4 = 4.0 / Z; for (std::size_t i = start; i < end; ++i) { - const std::size_t i_d = i * d; + const std::size_t i_d = i + i; const std::size_t i_n = i * n; for (std::size_t j = 0; j < n; ++j) { if (i == j) { @@ -44,9 +44,8 @@ void tsne_grad(const std::vector &P, const std::vector &W, } const std::size_t ij = i_n + j; double k_ij = Z4 * W[ij] * (Z * P[ij] - W[ij]); - for (std::size_t k = 0; k < d; ++k) { - gradient[i_d + k] += k_ij * (Y[i_d + k] - Y[j * d + k]); - } + gradient[i_d] += k_ij * (Y[i_d] - Y[j * 2]); + gradient[i_d + 1] += k_ij * (Y[i_d + 1] - Y[j * 2 + 1]); } } } @@ -262,19 +261,17 @@ NumericMatrix tsne_grad_cpp(const NumericMatrix &P, const NumericMatrix &Y, std::size_t n_threads) { std::size_t n = Y.nrow(); - std::size_t d = Y.ncol(); std::vector P_vec(P.begin(), P.end()); std::vector W_vec(W.begin(), W.end()); - std::vector Y_vec(n * d); + std::vector Y_vec(n * 2); for (std::size_t i = 0; i < n; ++i) { - for (std::size_t j = 0; j < d; ++j) { - Y_vec[i * d + j] = Y(i, j); - } + Y_vec[i * 2] = Y(i, 0); + Y_vec[i * 2 + 1] = Y(i, 1); } - std::vector gradient_vec(n * d, 0.0); + std::vector gradient_vec(n * 2, 0.0); if (n_threads > 1) { std::size_t chunk_size = (n + n_threads - 1) / n_threads; @@ -284,20 +281,19 @@ NumericMatrix tsne_grad_cpp(const NumericMatrix &P, std::size_t end_row = std::min(start_row + chunk_size, n); threads.emplace_back(tsne_grad, std::cref(P_vec), std::cref(W_vec), Z, std::cref(Y_vec), std::ref(gradient_vec), start_row, - end_row, n, d); + end_row, n); } for (auto &thread : threads) { thread.join(); } } else { - tsne_grad(P_vec, W_vec, Z, Y_vec, gradient_vec, 0, n, n, d); + tsne_grad(P_vec, W_vec, Z, Y_vec, gradient_vec, 0, n, n); } - NumericMatrix gradient(n, d); + NumericMatrix gradient(n, 2); for (std::size_t i = 0; i < n; ++i) { - for (std::size_t j = 0; j < d; ++j) { - gradient(i, j) = gradient_vec[i * d + j]; - } + gradient(i, 0) = gradient_vec[i * 2]; + gradient(i, 1) = gradient_vec[i * 2 + 1]; } return gradient; From 3023efbf3000ffbf1e44d246dce4bcce6392b305 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sun, 11 Aug 2024 20:55:00 -0700 Subject: [PATCH 23/26] Add Barnes-Hut, irlba PCA, perplexity on neighbors --- smallvis/R/RcppExports.R | 12 +- smallvis/R/cost.R | 2 +- smallvis/R/init.R | 46 +++- smallvis/R/perplexity.R | 186 +++++++++----- smallvis/R/smallvis.R | 23 +- smallvis/R/sne.R | 70 +++++ smallvis/man/smallvis.Rd | 100 ++++---- smallvis/man/smallvis_perpstep.Rd | 5 +- smallvis/man/smallvis_rep.Rd | 15 +- smallvis/src/RcppExports.cpp | 45 +++- smallvis/src/bh.cpp | 40 +++ smallvis/src/bh.h | 369 +++++++++++++++++++++++++++ smallvis/src/gradients.cpp | 39 ++- smallvis/src/perplexity.cpp | 22 +- smallvis/src/threads.h | 65 +++++ smallvis/tests/testthat/helper_api.R | 4 +- smallvis/tests/testthat/test_api.R | 12 +- 17 files changed, 882 insertions(+), 173 deletions(-) create mode 100644 smallvis/src/bh.cpp create mode 100644 smallvis/src/bh.h create mode 100644 smallvis/src/threads.h diff --git a/smallvis/R/RcppExports.R b/smallvis/R/RcppExports.R index d41745b..f96ef06 100644 --- a/smallvis/R/RcppExports.R +++ b/smallvis/R/RcppExports.R @@ -1,6 +1,14 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 +bh_tsne_gradient_cpp <- function(indices, indptr, P_data, embedding, theta = 0.5, eps = 1e-16, n_threads = 1L) { + .Call(`_smallvis_bh_tsne_gradient_cpp`, indices, indptr, P_data, embedding, theta, eps, n_threads) +} + +bh_plogq_cpp <- function(indices, indptr, P_data, embedding, theta = 0.5, eps = 1e-16, n_threads = 1L) { + .Call(`_smallvis_bh_plogq_cpp`, indices, indptr, P_data, embedding, theta, eps, n_threads) +} + dist2_cpp <- function(input, n_threads = 1L) { .Call(`_smallvis_dist2_cpp`, input, n_threads) } @@ -25,8 +33,8 @@ mmds_grad_cpp <- function(R, D, Y, eps, n_threads) { .Call(`_smallvis_mmds_grad_cpp`, R, D, Y, eps, n_threads) } -find_beta_knn_cpp <- function(knn_distances, knn_indices, perplexity = 15, tol = 1e-5, max_tries = 50L, n_threads = 1L) { - .Call(`_smallvis_find_beta_knn_cpp`, knn_distances, knn_indices, perplexity, tol, max_tries, n_threads) +find_beta_knn_cpp <- function(knn_distances, knn_indices, perplexity = 15, tol = 1e-5, max_tries = 50L, ret_sparse = FALSE, n_threads = 1L) { + .Call(`_smallvis_find_beta_knn_cpp`, knn_distances, knn_indices, perplexity, tol, max_tries, ret_sparse, n_threads) } find_beta_cpp <- function(X, perplexity = 15, tol = 1e-5, max_tries = 50L, n_threads = 1L) { diff --git a/smallvis/R/cost.R b/smallvis/R/cost.R index de9219c..810d696 100644 --- a/smallvis/R/cost.R +++ b/smallvis/R/cost.R @@ -140,7 +140,7 @@ cost_point <- function(cost, Y) { cost_clear <- function(cost) { if (!is.null(cost$sentinel)) { cost[[cost$sentinel]] <- NULL - } else if (is.null(cost$clear)) { + } else if (!is.null(cost$clear)) { cost <- cost$clear(cost) } cost diff --git a/smallvis/R/init.R b/smallvis/R/init.R index 2272da4..35c9f57 100644 --- a/smallvis/R/init.R +++ b/smallvis/R/init.R @@ -54,7 +54,8 @@ laplacian_eigenmap <- function(A, # This effectively row-normalizes A: colSums is normally faster than rowSums # and because A is symmetric, they're equivalent M <- A / colSums(A) - if (use_RSpectra && requireNamespace("RSpectra", quietly = TRUE, warn.conflicts = FALSE)) { + if (use_RSpectra && + requireNamespace("RSpectra", quietly = TRUE, warn.conflicts = FALSE)) { tsmessage("Using RSpectra for eigenvectors") Re(RSpectra::eigs(M, k = ndim + 1)$vectors[, 2:(ndim + 1)]) } else { @@ -78,7 +79,8 @@ normalized_spectral_init <- function(A, L <- -t(A / Dsq) / Dsq diag(L) <- 1 + diag(L) - if (use_RSpectra && requireNamespace("RSpectra", quietly = TRUE, warn.conflicts = FALSE)) { + if (use_RSpectra && + requireNamespace("RSpectra", quietly = TRUE, warn.conflicts = FALSE)) { tsmessage("Using RSpectra for eigenvectors") k <- ndim + 1 ncv <- max(2 * k + 1, floor(sqrt(n))) @@ -143,23 +145,43 @@ pca_scores <- function(X, } scores <- res_mds$points } else { - X <- scale(X, center = TRUE, scale = FALSE) - # do SVD on X directly rather than forming covariance matrix - s <- svd(X, nu = ncol, nv = 0) - D <- diag(c(s$d[1:ncol])) - if (verbose || ret_extra) { - # calculate eigenvalues of covariance matrix from singular values - lambda <- (s$d^2) / (nrow(X) - 1) - varex <- sum(lambda[1:ncol]) / sum(lambda) + if (ncol < 0.5 * min(dim(X))) { + res <- irlba::prcomp_irlba( + X, + n = ncol, + retx = TRUE, + center = TRUE, + scale = FALSE + ) + scores <- res$x + ncol <- ncol(res$rotation) + varex <- sum(res$sdev[1:ncol]^2) / res$totalvar tsmessage( - "PCA: ", + "PCA (via irlba): ", ncol, " components explained ", formatC(varex * 100), "% variance" ) + } else { + X <- scale(X, center = TRUE, scale = FALSE) + # do SVD on X directly rather than forming covariance matrix + s <- svd(X, nu = ncol, nv = 0) + D <- diag(c(s$d[1:ncol])) + if (verbose || ret_extra) { + # calculate eigenvalues of covariance matrix from singular values + lambda <- (s$d^2) / (nrow(X) - 1) + varex <- sum(lambda[1:ncol]) / sum(lambda) + tsmessage( + "PCA (via SVD): ", + ncol, + " components explained ", + formatC(varex * 100), + "% variance" + ) + } + scores <- s$u %*% D } - scores <- s$u %*% D } if (ret_extra) { diff --git a/smallvis/R/perplexity.R b/smallvis/R/perplexity.R index 627c8aa..e2aaa29 100644 --- a/smallvis/R/perplexity.R +++ b/smallvis/R/perplexity.R @@ -455,20 +455,19 @@ scale_affinities <- function(P, # row normalization before anything else if (nnat(row_normalize)) { if (symmetrize == "rowsymm") { - P <- 0.5 * (P + t(P)) + P <- 0.5 * (P + Matrix::t(P)) symmetrize <- "none" } - P <- P / rowSums(P) + P <- P / Matrix::rowSums(P) } else if (is.numeric(row_normalize)) { - P <- row_normalize * P / rowSums(P) + P <- row_normalize * P / Matrix::rowSums(P) } - # Symmetrize P <- switch(symmetrize, none = P, - symmetric = 0.5 * (P + t(P)), - average = 0.5 * (P + t(P)), - mutual = sqrt(P * t(P)), + symmetric = 0.5 * (P + Matrix::t(P)), + average = 0.5 * (P + Matrix::t(P)), + mutual = sqrt(P * Matrix::t(P)), umap = fuzzy_set_union(P), fuzzy = fuzzy_set_union(P), stop("unknown symmetrization: ", symmetrize) @@ -559,32 +558,89 @@ sne_init <- function(cost, if (!is.numeric(perplexity)) { stop("Unknown perplexity method, '", perplexity[[1]], "'") } - if (kernel == "perpknn") { + + # perpnn options: + # perpnnks: use exact knn with sparse output + # perpnnas: use approximate knn with sparse output + # perpnnkd: use exact knn with dense output + # perpnnad: use approximate knn with dense output + if (startsWith(kernel, "perpnn")) { + n <- nrow(X) + if (perplexity > n - 1) { + stop("Perplexity too high for number of points") + } k <- 3 * perplexity - if (k > nrow(X) - 1) { - warning("Perplexity probably too high for number of points,", - " result may not be meaningful") - k <- nrow(X) - 1 + if (k > n - 1) { + warning( + "Perplexity probably too high for number of points,", + " result may not be meaningful" + ) + k <- n - 1 + } + if (kernel %in% c("perpnnks", "perpnnkd")) { + tsmessage( + "Finding exact nearest neighbors with k = ", + k, + " n_threads = ", + n_threads + ) + knn <- rnndescent::brute_force_knn(X, k = k + 1, n_threads = n_threads) + } else { + tsmessage( + "Finding approximate nearest neighbors with k = ", + k, + " n_threads = ", + n_threads + ) + knn <- rnndescent::rnnd_knn(X, k = k + 1, n_threads = n_threads) } - tsmessage("Commencing calibration for perplexity = ", - format_perps(perplexity), " with k = ", k) - knn <- rnndescent::brute_force_knn(X, k = k + 1, n_threads = n_threads) knn_dist <- knn$dist[, 2:(k + 1)] knn_idx <- knn$idx[, 2:(k + 1)] - P <- find_beta_knn_cpp(knn_dist, knn_idx, perplexity = perplexity, - n_threads = n_threads)$P - } - else { - tsmessage("Commencing calibration for perplexity = ", - format_perps(perplexity)) + + ret_sparse <- kernel %in% c("perpnnks", "perpnnas") + tsmessage( + "Commencing calibration for perplexity = ", + format_perps(perplexity), + " n_threads = ", + n_threads + ) + P <- find_beta_knn_cpp( + knn_dist, + knn_idx, + perplexity = perplexity, + n_threads = n_threads, + ret_sparse = ret_sparse + )$P + if (ret_sparse) { + P <- Matrix::sparseMatrix( + i = rep(1:n, each = k), + j = as.vector(t(knn_idx)), + x = P, + dims = c(n, n), + repr = "C" + ) + } + } else { if (use_cpp) { + tsmessage( + "Commencing calibration for perplexity = ", + format_perps(perplexity), + " n_threads = ", + n_threads + ) P <- find_beta_cpp(X, perplexity, tol = 1e-5, n_threads = n_threads)$W } else { - x2ares <- x2aff(X, - perplexity, - tol = 1e-5, - kernel = kernel, - verbose = verbose) + tsmessage( + "Commencing calibration for perplexity = ", + format_perps(perplexity) + ) + x2ares <- x2aff( + X, + perplexity, + tol = 1e-5, + kernel = kernel, + verbose = verbose + ) P <- x2ares$W } } @@ -598,46 +654,48 @@ sne_init <- function(cost, ) cost$P <- P - if (is.logical(row_normalize)) { - tsmessage( - "Effective perplexity of P approx = ", - formatC(stats::median(perpp(P))) - ) - } + if (!methods::is(P, "sparseMatrix")) { + if (is.logical(row_normalize)) { + tsmessage( + "Effective perplexity of P approx = ", + formatC(stats::median(perpp(P))) + ) + } - for (r in unique(tolower(ret_extra))) { - switch(r, - v = { - cost$V <- x2ares$W - }, - dint = { - if (!is.null(x2ares$dint)) { - cost$dint <- x2ares$dint + for (r in unique(tolower(ret_extra))) { + switch(r, + v = { + cost$V <- x2ares$W + }, + dint = { + if (!is.null(x2ares$dint)) { + cost$dint <- x2ares$dint + } + }, + beta = { + if (!is.null(x2ares$beta)) { + cost$beta <- x2ares$beta + } + }, + adegc = { + cost$adegc <- 0.5 * rowSums(x2ares$W) + colSums(x2ares$W) + }, + adegin = { + cost$adegin <- rowSums(x2ares$W) + }, + adegout = { + cost$adegout <- colSums(x2ares$W) + }, + pdeg = { + cost$pdeg <- colSums(P) + }, + idp = { + if (!is.null(x2ares$idp)) { + cost$idp <- x2ares$idp + } } - }, - beta = { - if (!is.null(x2ares$beta)) { - cost$beta <- x2ares$beta - } - }, - adegc = { - cost$adegc <- 0.5 * rowSums(x2ares$W) + colSums(x2ares$W) - }, - adegin = { - cost$adegin <- rowSums(x2ares$W) - }, - adegout = { - cost$adegout <- colSums(x2ares$W) - }, - pdeg = { - cost$pdeg <- colSums(P) - }, - idp = { - if (!is.null(x2ares$idp)) { - cost$idp <- x2ares$idp - } - } - ) + ) + } } cost } diff --git a/smallvis/R/smallvis.R b/smallvis/R/smallvis.R index 0c5b12e..e2c93e7 100644 --- a/smallvis/R/smallvis.R +++ b/smallvis/R/smallvis.R @@ -8,6 +8,7 @@ #' \itemize{ #' \item \code{"tsne"} t-Distributed Stochastic Neighbor Embedding #' (van der Maaten and Hinton, 2008). +#' \item \code{"bhtsne"} Barnes-Hut t-SNE (van der Maaten, 2014). #' \item \code{"largevis"} the cost function of the LargeVis algorithm #' (Tang et al, 2016). Input affinities are calculated and symmetrized using #' the same perplexity calibration method as t-SNE, but are not normalized. @@ -535,6 +536,8 @@ #' \code{.Machine$double.eps}, but if you see inconsistent convergence results #' with optimizer that should be reducing the cost each iteration, then try #' setting this to a larger value, e.g. between \code{1e-3 - 1e-9}. +#' @param theta Barnes-Hut approximation accuracy. Default is 0.5. Set to 0.0 +#' for exact t-SNE. Applies only for \code{method = "bhtsne"}. #' @param verbose If \code{TRUE}, log progress messages to the console. #' @return If \code{ret_extra} is \code{FALSE}, the embedded output coordinates #' as a matrix. Otherwise, a list with the following items: @@ -807,6 +810,11 @@ #' \emph{Journal of Machine Learning Research}, \emph{9} (2579-2605). #' \url{http://www.jmlr.org/papers/v9/vandermaaten08a.html} #' +#' Van der Maaten, L. (2014). +#' Accelerating t-SNE using tree-based algorithms. +#' \emph{Journal of Machine Learning Research}, \emph{15}(1) (3221-3245). +#' \url{https://jmlr.org/papers/v15/vandermaaten14a.html} +#' #' Venna, J., Peltonen, J., Nybo, K., Aidos, H., & Kaski, S. (2010). #' Information retrieval perspective to nonlinear dimensionality reduction for #' data visualization. @@ -871,6 +879,7 @@ smallvis <- function(X, ret_extra = FALSE, n_threads = 0, use_cpp = FALSE, + theta = 0.5, eps = .Machine$double.eps, verbose = TRUE) { if (is.logical(epoch_callback)) { @@ -884,7 +893,7 @@ smallvis <- function(X, } # The embedding method - cost_fn <- create_cost(method, perplexity, eps, n_threads, use_cpp) + cost_fn <- create_cost(method, perplexity, eps, n_threads, use_cpp, theta = theta) if (exaggeration_factor != 1) { if (stop_lying_iter < 1) { @@ -1389,7 +1398,8 @@ create_cost <- function(method, perplexity, eps, n_threads, - use_cpp) { + use_cpp, + theta) { method_names <- c( "tsne", "largevis", @@ -1438,7 +1448,8 @@ create_cost <- function(method, "jssne", "gsne", "abssne", - "bhssne" + "bhssne", + "bhtsne" ) if (is.character(method)) { method <- match.arg(tolower(method), method_names) @@ -1449,6 +1460,12 @@ create_cost <- function(method, eps = eps, use_cpp = use_cpp ), + bhtsne = bhtsne( + perplexity = perplexity, + n_threads = n_threads, + eps = eps, + theta = theta + ), umap = umap( perplexity = perplexity, n_threads = n_threads, diff --git a/smallvis/R/sne.R b/smallvis/R/sne.R index e6acda2..468cea9 100644 --- a/smallvis/R/sne.R +++ b/smallvis/R/sne.R @@ -79,6 +79,76 @@ tsne <- function(perplexity, ) } +bhtsne <- function(perplexity, + inp_kernel = "perpnnas", + eps = .Machine$double.eps, + theta = 0.5, + n_threads = 0) { + list( + init = function(cost, + X, + max_iter, + verbose = FALSE, + ret_extra = c()) { + cost <- sne_init( + cost, + X, + perplexity = perplexity, + kernel = inp_kernel, + symmetrize = "symmetric", + normalize = TRUE, + row_normalize = TRUE, + verbose = verbose, + ret_extra = ret_extra, + n_threads = n_threads, + use_cpp = TRUE + ) + cost$eps <- eps + cost + }, + cache_input = function(cost) { + cost$plogp <- sum(cost$P@x * log(cost$P@x)) + cost + }, + pfn = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$pcost <- cost$plogp - bh_plogq_cpp(cost$P@i, + cost$P@p, + cost$P@x, + Y, + theta = theta, + n_threads = n_threads + ) + # browser() + cost + }, + gr = function(cost, Y) { + cost <- cost_update(cost, Y) + cost$G <- bh_tsne_gradient_cpp(cost$P@i, + cost$P@p, + cost$P@x, + Y, + theta = theta, + n_threads = n_threads + ) + # browser() + cost$G <- 4 * cost$G + cost + }, + export = function(cost, val) { + res <- cost_export(cost, val) + res + }, + update = function(cost, Y) { + cost + }, + exaggerate = function(cost, exaggeration_factor) { + cost$P <- cost$P * exaggeration_factor + cost + } + ) +} + # Cook, J., Sutskever, I., Mnih, A., & Hinton, G. E. (2007). # Visualizing similarity data with a mixture of maps. # In \emph{International Conference on Artificial Intelligence and Statistics} (pp. 67-74). diff --git a/smallvis/man/smallvis.Rd b/smallvis/man/smallvis.Rd index 284de3c..1991e0f 100644 --- a/smallvis/man/smallvis.Rd +++ b/smallvis/man/smallvis.Rd @@ -38,6 +38,7 @@ smallvis( ret_extra = FALSE, n_threads = 0, use_cpp = FALSE, + theta = 0.5, eps = .Machine$double.eps, verbose = TRUE ) @@ -65,7 +66,7 @@ standard deviation. See 'Output initialization' section.} \item{perplexity}{The target perplexity for parameterizing the input probabilities. For method \code{"umap"}, controls the neighborhood size for parameterizing the smoothed k-nearest neighbor distances. See also the -'Intrinsic dimensionality perplexity' and 'Multiscale perplexities' +'Intrinsic dimensionality perplexity' and 'Multiscale perplexities' sections.} \item{max_iter}{Maximum number of iterations in the optimization.} @@ -114,24 +115,24 @@ this case, use the gradient norm to stop early.} \item{momentum}{Initial momentum value.} -\item{final_momentum}{Final momentum value. If +\item{final_momentum}{Final momentum value. If \code{late_exaggeration_factor > 1}, then during late exaggeration, the momentum is switched back to \code{momentum} from this value.} \item{mom_switch_iter}{Iteration at which the momentum will switch from -\code{momentum} to \code{final_momentum}. If +\code{momentum} to \code{final_momentum}. If \code{exaggeration_factor > 1}, then this should occur at some point after \code{stop_lying_iter} (default is 150 iterations after). If the early exaggeration phase stops early, this value is treated as being relative to when early exaggeration stops, to avoid wasting iterations at the lower momentum value. For example, if \code{stop_lying_iter = 100} and \code{mom_switch_iter = 250} (the defaults), but early exaggeration -converges at iteration 50, the switch iteration will occur at iteration +converges at iteration 50, the switch iteration will occur at iteration 150.} \item{eta}{Learning rate value, a positive number. Or set to \code{"optsne"}, to use the formula suggested by Belkina and co-workers (2018) in their -opt-SNE package (the size of the dataset divided by the +opt-SNE package (the size of the dataset divided by the \code{exaggeration_factor}).} \item{min_gain}{Minimum gradient descent step size.} @@ -179,8 +180,8 @@ number of iterations before monitoring the relative rate of change of the cost function during early exaggeration.} \item{ee_mon_buffer}{If \code{ee_mon_epoch} is non-\code{NULL}, then ignore -this number of occurences of the relative rate of change of the cost -function decreasing, which would otherwise signal termination of +this number of occurences of the relative rate of change of the cost +function decreasing, which would otherwise signal termination of the early exaggeration stage. This is to prevent erroneous termination of early exaggeration under conditions when the cost can fluctuate noisily.} @@ -197,7 +198,7 @@ to those value which are returned when this value is \code{TRUE}. See the \item{n_threads}{Number of threads to use in multi-threaded code. Default is 0, which means no multi-threading. Mainly affects the calculation of things -like distance matrices and perplexity calibration if you set +like distance matrices and perplexity calibration if you set \code{use_cpp = TRUE}. Otherwise, only methods that need to calculate nearest neighbors will be affected.} @@ -205,6 +206,9 @@ nearest neighbors will be affected.} calculations. Default is \code{FALSE}. This won't speed up all steps and you will want to use this in conjunction with \code{n_threads}.} +\item{theta}{Barnes-Hut approximation accuracy. Default is 0.5. Set to 0.0 +for exact t-SNE. Applies only for \code{method = "bhtsne"}.} + \item{eps}{Set epsilon for avoiding division-by-zero errors. Default is \code{.Machine$double.eps}, but if you see inconsistent convergence results with optimizer that should be reducing the cost each iteration, then try @@ -300,6 +304,7 @@ to the \code{method} parameter are: \itemize{ \item \code{"tsne"} t-Distributed Stochastic Neighbor Embedding (van der Maaten and Hinton, 2008). + \item \code{"bhtsne"} Barnes-Hut t-SNE (van der Maaten, 2014). \item \code{"largevis"} the cost function of the LargeVis algorithm (Tang et al, 2016). Input affinities are calculated and symmetrized using the same perplexity calibration method as t-SNE, but are not normalized. @@ -339,17 +344,17 @@ to the \code{method} parameter are: co-workers, use method \code{"bnerv"}. \item \code{"jse"} The Jensen-Shannon Embedding method of Lee and co-workers (2013). - \item \code{"absne"} The alpha-beta SNE method of Narayan and co-workers + \item \code{"absne"} The alpha-beta SNE method of Narayan and co-workers (2015). - \item \code{"chsne"} The chi-squared divergence version of t-SNE + \item \code{"chsne"} The chi-squared divergence version of t-SNE (Im and co-workers, 2018). - \item \code{"hlsne"} The Hellinger distance divergence version of t-SNE + \item \code{"hlsne"} The Hellinger distance divergence version of t-SNE (Im and co-workers, 2018). \item \code{"rklsne"} The reverse Kullback-Leibler divergence version of t-SNE (Im and co-workers, 2018). \item \code{"jssne"} The Jensen-Shannon divergence version of t-SNE (Im and co-workers, 2018). - \item \code{"gsne"}, The global SNE (g-SNE) method of Zhou and Sharpee + \item \code{"gsne"}, The global SNE (g-SNE) method of Zhou and Sharpee (2018). } @@ -381,9 +386,9 @@ Some parameters are available for all (nor nearly all) methods. sparsification is carried out with this kernel, so there are no memory or performance improvements to be had with this setting. \code{"skd"} uses the smooth knn distances method as used by UMAP. - \item{\code{symmetrize}} the type of symmetrization, used by symmetric + \item{\code{symmetrize}} the type of symmetrization, used by symmetric methods only. Can be one of: - \code{"symmetric"} symmetric nearest neighbor style, by arithmetic + \code{"symmetric"} symmetric nearest neighbor style, by arithmetic averaging, as in t-SNE. \code{"fuzzy"} symmetrization by fuzzy set union as used in UMAP. \code{"mutual"} mutual nearest neighbor style as suggested by Schubert @@ -394,10 +399,10 @@ Some parameters are available for all (nor nearly all) methods. \item \code{"LargeVis"} \itemize{ \item{\code{gamma}} Weighting term for the repulsive versus attractive - forces. Default is \code{1}. The implementation by the creators of - LargeVis uses a default \code{gamma = 7}, but note that this is for - stochastic gradient descent with limited sampling of the repulsive - contributions so it's unlikely to be a good choice with the + forces. Default is \code{1}. The implementation by the creators of + LargeVis uses a default \code{gamma = 7}, but note that this is for + stochastic gradient descent with limited sampling of the repulsive + contributions so it's unlikely to be a good choice with the implementation used in this package. \item{\code{gr_eps}} Epsilon used in the gradient to prevent division by zero. Default is \code{0.1}. @@ -456,9 +461,9 @@ Some parameters are available for all (nor nearly all) methods. } \item \code{"ABSNE"} \itemize{ - \item{\code{alpha}} Alpha value for the alpha-beta divergence. Set + \item{\code{alpha}} Alpha value for the alpha-beta divergence. Set \code{alpha < 1} to produce more smaller, finer-grained clusters, and - \code{alpha > 1} to produce fewer, larger clusters, with more emphasis + \code{alpha > 1} to produce fewer, larger clusters, with more emphasis on global structure. Default is \code{1.0}, to give t-SNE-like behavior. \item{\code{lambda}} Sum of alpha + beta, where beta is the beta value for the alpha-beta divergence. Set \code{lambda < 1} to increase cluster @@ -467,8 +472,8 @@ Some parameters are available for all (nor nearly all) methods. } \item \code{"gsne"} \itemize{ - \item{\code{lambda}} Weighting factor to put increasing emphasis on - preserving global similarities. Set to \code{0} to get t-SNE (no + \item{\code{lambda}} Weighting factor to put increasing emphasis on + preserving global similarities. Set to \code{0} to get t-SNE (no extra emphasis on global structure), and to \code{1.0} to get equal weighting between the local and global divergences. Default is \code{1.0}. } @@ -488,7 +493,7 @@ to one of the following: \item{A matrix}: which must have dimensions \code{n} by \code{k}, where \code{n} is the number of rows in \code{X}. \item{\code{"rand"}}: initialize from a Gaussian distribution with mean 0 - and standard deviation 1e-4, the default used by t-SNE. The standard + and standard deviation 1e-4, the default used by t-SNE. The standard deviation can be controlled with \code{Y_init_sdev} (see below). \item{\code{"pca"}}: use the first \code{k} scores of the PCA: columns are centered, but no scaling beyond that which is applied by @@ -527,8 +532,8 @@ Steinerberger, 2017): it may therefore unnecessary to use the The \code{Y_init_sdev} parameter, if provided, will scale the input coordinates such that the standard deviation of each dimension is the -provided value. The default is to do no scaling, except for -\code{Y_init = "spca"} and \code{Y_init = "rand"} where a scaling to a +provided value. The default is to do no scaling, except for +\code{Y_init = "spca"} and \code{Y_init = "rand"} where a scaling to a standard deviation of \code{1e-4} is used, as in t-SNE initialization. \code{Y_init = "spca"} is effectively an alias for \code{Y_init = "pca", Y_init_sdev = 1e-4}. @@ -585,19 +590,19 @@ increasing order. For more details on IDP, see Another technique to combine multiple perplexities is to use the multiscale -approach given by de Bodt and co-workers (2018). As with IDP, a series of +approach given by de Bodt and co-workers (2018). As with IDP, a series of candidate perplexities are used, but all the affinity matrices are used to create an average matrix which is used as the final probability matrix. -To use this method, set \code{perplexity = "multiscale"}. Default and custom -list of perplexities to use can be provided in the same way as with IDP. +To use this method, set \code{perplexity = "multiscale"}. Default and custom +list of perplexities to use can be provided in the same way as with IDP. Note that previous work by this group described a slightly more complex approach where the number of individual perplexity results are introduced into the average sequentially over the course of the optimization, and -the output probabilities are also generated by an averaging. Although also +the output probabilities are also generated by an averaging. Although also referred to as "multiscale", these variations are not implemented. Also, -if \code{ret_extra = TRUE} is used, extra data associated with a specific -perplexity (e.g. degree centrality, intrinsic dimensionality) will not +if \code{ret_extra = TRUE} is used, extra data associated with a specific +perplexity (e.g. degree centrality, intrinsic dimensionality) will not be returned. } @@ -816,14 +821,14 @@ In \emph{Advances in neural information processing systems} (pp. 585-591). \url{http://papers.nips.cc/paper/1961-laplacian-eigenmaps-and-spectral-techniques-for-embedding-and-clustering.pdf} -Belkina, A. C., Ciccolella, C. O., Anno, R., Spidlen, J., Halpert, R., & Snyder-Cappione, J. (2018). -Automated optimal parameters for T-distributed stochastic neighbor embedding improve visualization and allow analysis of large datasets. +Belkina, A. C., Ciccolella, C. O., Anno, R., Spidlen, J., Halpert, R., & Snyder-Cappione, J. (2018). +Automated optimal parameters for T-distributed stochastic neighbor embedding improve visualization and allow analysis of large datasets. \emph{bioRxiv}, 451690. \url{https://www.biorxiv.org/content/10.1101/451690v2.abstract} -De Bodt, C., Mulders, D., Verleysen, M., & Lee, J. A. (2018). -Perplexity-free t-SNE and twice Student tt-SNE. -In \emph{European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN 2018)} (pp. 123-128). +De Bodt, C., Mulders, D., Verleysen, M., & Lee, J. A. (2018). +Perplexity-free t-SNE and twice Student tt-SNE. +In \emph{European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN 2018)} (pp. 123-128). \url{http://hdl.handle.net/2078.1/200844} Borg, I., & Groenen, P. J. (2005). @@ -842,13 +847,13 @@ Hinton, G. E., & Roweis, S. T. (2002). Stochastic neighbor embedding. In \emph{Advances in neural information processing systems} (pp. 833-840). -Im, D. J., Verma, N., & Branson, K. (2018). -Stochastic Neighbor Embedding under f-divergences. +Im, D. J., Verma, N., & Branson, K. (2018). +Stochastic Neighbor Embedding under f-divergences. \emph{arXiv preprint} \emph{arXiv}:1811.01247. \url{https://arxiv.org/abs/1811.01247} Kobak, D., & Berens, P. (2018). -The art of using t-SNE for single-cell transcriptomics. +The art of using t-SNE for single-cell transcriptomics. \emph{bioRxiv}, 453449. \url{https://doi.org/10.1101/453449} @@ -872,13 +877,13 @@ UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction \emph{arXiv preprint} \emph{arXiv}:1802.03426. \url{https://arxiv.org/abs/1802.03426} -Narayan, K. S., Punjani, A., & Abbeel, P. (2015, June). -Alpha-Beta Divergences Discover Micro and Macro Structures in Data. +Narayan, K. S., Punjani, A., & Abbeel, P. (2015, June). +Alpha-Beta Divergences Discover Micro and Macro Structures in Data. In \emph{Proceedings of the 32nd International Conference on Machine Learning (ICML-14)} (pp 796-804). \url{http://proceedings.mlr.press/v37/narayan15.html} -Schubert, E., & Gertz, M. (2017, October). +Schubert, E., & Gertz, M. (2017, October). Intrinsic t-stochastic neighbor embedding for visualization and outlier detection. In \emph{International Conference on Similarity Search and Applications} (pp. 188-203). Springer, Cham. @@ -901,6 +906,11 @@ Visualizing data using t-SNE. \emph{Journal of Machine Learning Research}, \emph{9} (2579-2605). \url{http://www.jmlr.org/papers/v9/vandermaaten08a.html} +Van der Maaten, L. (2014). +Accelerating t-SNE using tree-based algorithms. +\emph{Journal of Machine Learning Research}, \emph{15}(1) (3221-3245). +\url{https://jmlr.org/papers/v15/vandermaaten14a.html} + Venna, J., Peltonen, J., Nybo, K., Aidos, H., & Kaski, S. (2010). Information retrieval perspective to nonlinear dimensionality reduction for data visualization. @@ -926,8 +936,8 @@ Majorization-Minimization for Manifold Embedding. In \emph{Proceedings of the 18th International Conference on Artificial Intelligence and Statistics (AISTATS 2015)} (pp. 1088-1097). -Zhou, Y., & Sharpee, T. (2018). -Using global t-SNE to preserve inter-cluster data structure. -\emph{bioRxiv}, 331611. +Zhou, Y., & Sharpee, T. (2018). +Using global t-SNE to preserve inter-cluster data structure. +\emph{bioRxiv}, 331611. \url{https://doi.org/10.1101/331611} } diff --git a/smallvis/man/smallvis_perpstep.Rd b/smallvis/man/smallvis_perpstep.Rd index 5015b24..ebafcce 100644 --- a/smallvis/man/smallvis_perpstep.Rd +++ b/smallvis/man/smallvis_perpstep.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/smallvis.R +% Please edit documentation in R/iterated.R \name{smallvis_perpstep} \alias{smallvis_perpstep} \title{Dimensionality Reduction With Perplexity Stepping} @@ -60,7 +60,8 @@ Any value of \code{tol}, \code{exaggeration_factor} and # and then 750 iterations at perplexity = 40. iris_lbfgs_pstep <- smallvis_perpscale( step_iter = 250, X = iris, scale = FALSE, verbose = TRUE, Y_init = "spca", - ret_extra = c("DX", "DY"), perplexity = 40, max_iter = 1000, opt = list("l-bfgs")) + ret_extra = c("DX", "DY"), perplexity = 40, max_iter = 1000, opt = list("l-bfgs") +) } } \references{ diff --git a/smallvis/man/smallvis_rep.Rd b/smallvis/man/smallvis_rep.Rd index 81958bc..3245943 100644 --- a/smallvis/man/smallvis_rep.Rd +++ b/smallvis/man/smallvis_rep.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/smallvis.R +% Please edit documentation in R/iterated.R \name{smallvis_rep} \alias{smallvis_rep} \title{Best t-SNE Result From Multiple Initializations} @@ -37,8 +37,10 @@ This function ignores any value of \code{Y_init} you set, and uses \examples{ \dontrun{ # Return best result out of five random initializations -tsne_iris_best <- smallvis_rep(nrep = 5, X = iris, perplexity = 50, method = "tsne", - ret_extra = TRUE) +tsne_iris_best <- smallvis_rep( + nrep = 5, X = iris, perplexity = 50, method = "tsne", + ret_extra = TRUE +) # How much do the costs vary between runs? range(tsne_iris_best$all_costs) # Display best embedding found @@ -46,10 +48,11 @@ plot(tsne_iris_best$Y) # Keep all results # First result is in tsne_iris_rep[[1]], second in tsne_iris_rep[[2]] etc. -tsne_iris_rep <- smallvis_rep(nrep = 5, X = iris, perplexity = 50, method = "tsne", - ret_extra = TRUE, keep_all = TRUE) +tsne_iris_rep <- smallvis_rep( + nrep = 5, X = iris, perplexity = 50, method = "tsne", + ret_extra = TRUE, keep_all = TRUE +) # Index of result with smallest error is in special list item 'best_rep' best_iris <- tsne_iris_rep[[tsne_iris_rep[[1]]$best_rep]] - } } diff --git a/smallvis/src/RcppExports.cpp b/smallvis/src/RcppExports.cpp index ef11c3b..9b849df 100644 --- a/smallvis/src/RcppExports.cpp +++ b/smallvis/src/RcppExports.cpp @@ -10,6 +10,40 @@ Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif +// bh_tsne_gradient_cpp +NumericMatrix bh_tsne_gradient_cpp(IntegerVector indices, IntegerVector indptr, NumericVector P_data, NumericMatrix embedding, double theta, double eps, int n_threads); +RcppExport SEXP _smallvis_bh_tsne_gradient_cpp(SEXP indicesSEXP, SEXP indptrSEXP, SEXP P_dataSEXP, SEXP embeddingSEXP, SEXP thetaSEXP, SEXP epsSEXP, SEXP n_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< IntegerVector >::type indices(indicesSEXP); + Rcpp::traits::input_parameter< IntegerVector >::type indptr(indptrSEXP); + Rcpp::traits::input_parameter< NumericVector >::type P_data(P_dataSEXP); + Rcpp::traits::input_parameter< NumericMatrix >::type embedding(embeddingSEXP); + Rcpp::traits::input_parameter< double >::type theta(thetaSEXP); + Rcpp::traits::input_parameter< double >::type eps(epsSEXP); + Rcpp::traits::input_parameter< int >::type n_threads(n_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(bh_tsne_gradient_cpp(indices, indptr, P_data, embedding, theta, eps, n_threads)); + return rcpp_result_gen; +END_RCPP +} +// bh_plogq_cpp +double bh_plogq_cpp(IntegerVector indices, IntegerVector indptr, NumericVector P_data, NumericMatrix embedding, double theta, double eps, int n_threads); +RcppExport SEXP _smallvis_bh_plogq_cpp(SEXP indicesSEXP, SEXP indptrSEXP, SEXP P_dataSEXP, SEXP embeddingSEXP, SEXP thetaSEXP, SEXP epsSEXP, SEXP n_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< IntegerVector >::type indices(indicesSEXP); + Rcpp::traits::input_parameter< IntegerVector >::type indptr(indptrSEXP); + Rcpp::traits::input_parameter< NumericVector >::type P_data(P_dataSEXP); + Rcpp::traits::input_parameter< NumericMatrix >::type embedding(embeddingSEXP); + Rcpp::traits::input_parameter< double >::type theta(thetaSEXP); + Rcpp::traits::input_parameter< double >::type eps(epsSEXP); + Rcpp::traits::input_parameter< int >::type n_threads(n_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(bh_plogq_cpp(indices, indptr, P_data, embedding, theta, eps, n_threads)); + return rcpp_result_gen; +END_RCPP +} // dist2_cpp NumericMatrix dist2_cpp(NumericMatrix input, std::size_t n_threads); RcppExport SEXP _smallvis_dist2_cpp(SEXP inputSEXP, SEXP n_threadsSEXP) { @@ -89,8 +123,8 @@ BEGIN_RCPP END_RCPP } // find_beta_knn_cpp -List find_beta_knn_cpp(const NumericMatrix& knn_distances, const IntegerMatrix& knn_indices, double perplexity, double tol, int max_tries, std::size_t n_threads); -RcppExport SEXP _smallvis_find_beta_knn_cpp(SEXP knn_distancesSEXP, SEXP knn_indicesSEXP, SEXP perplexitySEXP, SEXP tolSEXP, SEXP max_triesSEXP, SEXP n_threadsSEXP) { +List find_beta_knn_cpp(const NumericMatrix& knn_distances, const IntegerMatrix& knn_indices, double perplexity, double tol, int max_tries, bool ret_sparse, std::size_t n_threads); +RcppExport SEXP _smallvis_find_beta_knn_cpp(SEXP knn_distancesSEXP, SEXP knn_indicesSEXP, SEXP perplexitySEXP, SEXP tolSEXP, SEXP max_triesSEXP, SEXP ret_sparseSEXP, SEXP n_threadsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -99,8 +133,9 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type perplexity(perplexitySEXP); Rcpp::traits::input_parameter< double >::type tol(tolSEXP); Rcpp::traits::input_parameter< int >::type max_tries(max_triesSEXP); + Rcpp::traits::input_parameter< bool >::type ret_sparse(ret_sparseSEXP); Rcpp::traits::input_parameter< std::size_t >::type n_threads(n_threadsSEXP); - rcpp_result_gen = Rcpp::wrap(find_beta_knn_cpp(knn_distances, knn_indices, perplexity, tol, max_tries, n_threads)); + rcpp_result_gen = Rcpp::wrap(find_beta_knn_cpp(knn_distances, knn_indices, perplexity, tol, max_tries, ret_sparse, n_threads)); return rcpp_result_gen; END_RCPP } @@ -121,13 +156,15 @@ END_RCPP } static const R_CallMethodDef CallEntries[] = { + {"_smallvis_bh_tsne_gradient_cpp", (DL_FUNC) &_smallvis_bh_tsne_gradient_cpp, 7}, + {"_smallvis_bh_plogq_cpp", (DL_FUNC) &_smallvis_bh_plogq_cpp, 7}, {"_smallvis_dist2_cpp", (DL_FUNC) &_smallvis_dist2_cpp, 2}, {"_smallvis_dist_cpp", (DL_FUNC) &_smallvis_dist_cpp, 2}, {"_smallvis_tweight_cpp", (DL_FUNC) &_smallvis_tweight_cpp, 2}, {"_smallvis_d2_to_tweight_cpp", (DL_FUNC) &_smallvis_d2_to_tweight_cpp, 2}, {"_smallvis_tsne_grad_cpp", (DL_FUNC) &_smallvis_tsne_grad_cpp, 5}, {"_smallvis_mmds_grad_cpp", (DL_FUNC) &_smallvis_mmds_grad_cpp, 5}, - {"_smallvis_find_beta_knn_cpp", (DL_FUNC) &_smallvis_find_beta_knn_cpp, 6}, + {"_smallvis_find_beta_knn_cpp", (DL_FUNC) &_smallvis_find_beta_knn_cpp, 7}, {"_smallvis_find_beta_cpp", (DL_FUNC) &_smallvis_find_beta_cpp, 5}, {NULL, NULL, 0} }; diff --git a/smallvis/src/bh.cpp b/smallvis/src/bh.cpp new file mode 100644 index 0000000..a246ecc --- /dev/null +++ b/smallvis/src/bh.cpp @@ -0,0 +1,40 @@ +#include "bh.h" +#include + +using namespace Rcpp; + +// [[Rcpp::export]] +NumericMatrix bh_tsne_gradient_cpp(IntegerVector indices, IntegerVector indptr, + NumericVector P_data, + NumericMatrix embedding, double theta = 0.5, + double eps = 1e-16, int n_threads = 1) { + std::vector indices_cpp = as>(indices); + std::vector indptr_cpp = as>(indptr); + std::vector P_data_cpp = as>(P_data); + + std::vector embedding_vec = + as>(transpose(embedding)); + + std::vector gradient_cpp = + smallvis::bh_tsne_gradient(indices_cpp, indptr_cpp, P_data_cpp, + embedding_vec, theta, eps, n_threads); + + std::size_t n_samples = embedding.nrow(); + NumericMatrix gradient(2, n_samples, gradient_cpp.begin()); + return transpose(gradient); +} + +// [[Rcpp::export]] +double bh_plogq_cpp(IntegerVector indices, IntegerVector indptr, + NumericVector P_data, NumericMatrix embedding, + double theta = 0.5, double eps = 1e-16, int n_threads = 1) { + std::vector indices_cpp = as>(indices); + std::vector indptr_cpp = as>(indptr); + std::vector P_data_cpp = as>(P_data); + + std::vector embedding_vec = + as>(transpose(embedding)); + + return smallvis::bh_plogq(indices_cpp, indptr_cpp, P_data_cpp, embedding_vec, + theta, eps, n_threads); +} diff --git a/smallvis/src/bh.h b/smallvis/src/bh.h new file mode 100644 index 0000000..6d7519f --- /dev/null +++ b/smallvis/src/bh.h @@ -0,0 +1,369 @@ +// BSD 3-Clause License +// +// Copyright (c) 2024, James Melville +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// This is a rough translation of the Barnes-Hut t-SNE cython code from the +// OpenTSNE package originally written by Pavlin Poličar. The original code can +// found at https://github.com/pavlin-policar/openTSNE + +#ifndef SMALLVIS_BH_H +#define SMALLVIS_BH_H + +#include +#include +#include +#include +#include +#include + +#include "threads.h" + +namespace smallvis { + +class QuadTree { +public: + // Node structure + struct Node { + // Center of mass of points within this node + double center_of_mass_x = 0.0; + double center_of_mass_y = 0.0; + std::size_t num_points = 0; + Node *children[4] = {nullptr, nullptr, nullptr, nullptr}; + + Node(double center_x, double center_y, double length) noexcept + : center_x(center_x), center_y(center_y), length(length), + length2(length * length) {} + + ~Node() noexcept { + for (Node *child : children) { + delete child; + } + } + + bool is_close(double point_x, double point_y, double eps) const noexcept { + return std::abs(center_of_mass_x - point_x) < eps && + std::abs(center_of_mass_y - point_y) < eps; + } + + bool is_skippable(double point_x, double point_y, + double eps) const noexcept { + return num_points == 0 || (is_leaf && is_close(point_x, point_y, eps)); + } + + // Barnes-Hut criterion rearranged for squared distances and squared theta + // to avoid square roots and divisions + bool is_summary(double d2, double theta2) const noexcept { + return is_leaf || length2 < d2 * theta2; + } + + void add_point(double point_x, double point_y, double eps) { + if ((is_leaf && num_points == 0) || is_close(point_x, point_y, eps)) { + update_center_of_mass(point_x, point_y); + return; + } + + if (is_leaf) { + split(); + std::size_t qid = find_quadrant(center_of_mass_x, center_of_mass_y); + children[qid]->add_point(center_of_mass_x, center_of_mass_y, eps); + } + + update_center_of_mass(point_x, point_y); + std::size_t qid = find_quadrant(point_x, point_y); + children[qid]->add_point(point_x, point_y, eps); + } + + private: + // Center of the node + double center_x; + double center_y; + // (Longest) side length of the node + double length; + // length squared used in Barnes Hut criterion + double length2; + bool is_leaf = true; + + std::size_t find_quadrant(double point_x, double point_y) const noexcept { + // Determine which quadrant the point is in -- 0: SW, 1: SE, 2: NW, 3: NE + if (point_x <= center_x) { + return point_y <= center_y ? 0 : 2; + } + return point_y <= center_y ? 1 : 3; + } + + void update_center_of_mass(double point_x, double point_y) noexcept { + center_of_mass_x = + (center_of_mass_x * num_points + point_x) / (num_points + 1); + center_of_mass_y = + (center_of_mass_y * num_points + point_y) / (num_points + 1); + num_points += 1; + } + + void split() { + is_leaf = false; + double new_len = length * 0.5; + double half_len = new_len * 0.5; + + children[0] = new Node(center_x - half_len, center_y - half_len, new_len); + children[1] = new Node(center_x + half_len, center_y - half_len, new_len); + children[2] = new Node(center_x - half_len, center_y + half_len, new_len); + children[3] = new Node(center_x + half_len, center_y + half_len, new_len); + } + }; + std::unique_ptr root; + + QuadTree(const std::vector &data, double eps = EPS) { + double coords_min_x = std::numeric_limits::max(); + double coords_max_x = std::numeric_limits::lowest(); + double coords_min_y = std::numeric_limits::max(); + double coords_max_y = std::numeric_limits::lowest(); + + const std::size_t n_points = data.size() / 2; + for (std::size_t i = 0; i < n_points * 2; i += 2) { + double x = data[i]; + double y = data[i + 1]; + + coords_min_x = std::min(coords_min_x, x); + coords_max_x = std::max(coords_max_x, x); + coords_min_y = std::min(coords_min_y, y); + coords_max_y = std::max(coords_max_y, y); + } + + double center_x = (coords_max_x + coords_min_x) * 0.5; + double center_y = (coords_max_y + coords_min_y) * 0.5; + double length = + std::max(coords_max_x - coords_min_x, coords_max_y - coords_min_y); + + root = std::make_unique(center_x, center_y, length); + for (std::size_t i = 0; i < n_points * 2; i += 2) { + root->add_point(data[i], data[i + 1], eps); + } + } + +private: + constexpr static double EPS = std::numeric_limits::epsilon(); +}; + +void _bh_tsne_negative_gradient_single(const QuadTree::Node &node, + double point_x, double point_y, + double theta2, double eps, + double &gradient_x, double &gradient_y, + double &Zi) { + // Ensure we do not process empty nodes or simple self-interactions + if (node.is_skippable(point_x, point_y, eps)) { + return; + } + + // Compute the squared Euclidean distance between the point and the center of + // mass + double diff_x = node.center_of_mass_x - point_x; + double diff_y = node.center_of_mass_y - point_y; + double d2_ij = eps + diff_x * diff_x + diff_y * diff_y; + + // Check if we can use this node as a summary + if (node.is_summary(d2_ij, theta2)) { + double w_ij = 1.0 / (1.0 + d2_ij); + + Zi += node.num_points * w_ij; + + // un-normalized -ve grad W*W*dY + w_ij *= w_ij; + gradient_x += node.num_points * w_ij * diff_x; + gradient_y += node.num_points * w_ij * diff_y; + + return; + } + + // Recursively apply Barnes-Hut to the children + for (const auto &child : node.children) { + _bh_tsne_negative_gradient_single(*child, point_x, point_y, theta2, eps, + gradient_x, gradient_y, Zi); + } +} + +// Function to estimate the negative gradient using the Barnes-Hut approximation +void bh_tsne_negative_gradient(const QuadTree &tree, + const std::vector &embedding, + double theta2, double eps, std::size_t n_threads, + std::vector &gradient) { + std::size_t num_points = embedding.size() / 2; + std::vector Zi(std::max(static_cast(1), n_threads), 0.0); + + const QuadTree::Node *root = tree.root.get(); + + // Function to calculate the negative gradient for a single point + auto worker = [&](std::size_t start, std::size_t end, std::size_t thread_id) { + for (std::size_t i = start * 2; i < end * 2; i += 2) { + _bh_tsne_negative_gradient_single(*root, embedding[i], embedding[i + 1], + theta2, eps, gradient[i], + gradient[i + 1], Zi[thread_id]); + } + }; + parallel_for(num_points, n_threads, worker); + + double Z = std::accumulate(Zi.begin(), Zi.end(), eps); + + // Normalize the gradient + for (std::size_t i = 0; i < num_points * 2; i += 2) { + gradient[i] /= Z; + gradient[i + 1] /= Z; + } +} + +void bh_tsne_positive_gradient(const std::vector &indices, + const std::vector &indptr, + const std::vector &P_data, + const std::vector &embedding, + std::size_t n_threads, + std::vector &gradient) { + + auto worker = [&](std::size_t start, std::size_t end) { + for (std::size_t i = start; i < end; ++i) { + const std::size_t i2 = i * 2; + for (std::size_t k = indptr[i]; k < indptr[i + 1]; ++k) { + std::size_t j2 = indices[k] * 2; + + // Compute the direction of the points' attraction and the squared + // Euclidean distance + double diff_x = embedding[i2] - embedding[j2]; + double diff_y = embedding[i2 + 1] - embedding[j2 + 1]; + double w_ij_p_ij = + P_data[k] / (1.0 + diff_x * diff_x + diff_y * diff_y); + + // Compute F_{attr} of point `j` on point `i` + // W x P x dY + gradient[i2] += w_ij_p_ij * diff_x; + gradient[i2 + 1] += w_ij_p_ij * diff_y; + } + } + }; + parallel_for(gradient.size() / 2, n_threads, worker); +} + +void _bh_Zi(const QuadTree::Node &node, double point_x, double point_y, + double theta2, double eps, double &Zi) { + // Ensure we do not process empty nodes or simple self-interactions + if (node.is_skippable(point_x, point_y, eps)) { + return; + } + + // Compute the squared Euclidean distance between the point and the center of + // mass + double diff_x = node.center_of_mass_x - point_x; + double diff_y = node.center_of_mass_y - point_y; + double d2_ij = eps + diff_x * diff_x + diff_y * diff_y; + + // Check if we can use this node as a summary + if (node.is_summary(d2_ij, theta2)) { + Zi += node.num_points / (1.0 + d2_ij); + return; + } + + // Recursively apply Barnes-Hut to the children + for (const auto &child : node.children) { + _bh_Zi(*child, point_x, point_y, theta2, eps, Zi); + } +} + +double bh_Z(const QuadTree &tree, const std::vector &embedding, + double theta2, double eps, std::size_t n_threads) { + std::size_t num_points = embedding.size() / 2; + std::vector Zi(std::max(static_cast(1), n_threads), 0.0); + + const QuadTree::Node *root = tree.root.get(); + + // Function to calculate the negative gradient for a single point + auto worker = [&](std::size_t start, std::size_t end, std::size_t thread_id) { + for (std::size_t i = start * 2; i < end * 2; i += 2) { + _bh_Zi(*root, embedding[i], embedding[i + 1], theta2, eps, Zi[thread_id]); + } + }; + parallel_for(num_points, n_threads, worker); + + return std::accumulate(Zi.begin(), Zi.end(), eps); +} + +double bh_plogq(const std::vector &indices, + const std::vector &indptr, + const std::vector &P_data, + const std::vector &embedding, double theta, double eps, + std::size_t n_threads) { + std::size_t n_samples = embedding.size() / 2; + + QuadTree tree(embedding, eps); + double mlogZ = + -std::log(bh_Z(tree, embedding, theta * theta, eps, n_threads)); + + std::vector partial_plogq( + std::max(static_cast(1), n_threads)); + auto worker = [&](std::size_t start, std::size_t end, std::size_t thread_id) { + double window_plogq = 0.0; + for (std::size_t i = start, i2 = start * 2; i < end; ++i, i2 += 2) { + for (std::size_t k = indptr[i]; k < indptr[i + 1]; ++k) { + std::size_t j2 = indices[k] * 2; + + // p * log(q) = p * log(w/Z) = p * (log w - log Z) = + // p * (log(1/(1+d2)) - log Z) = p * (-log(1+d2) - log Z) + double diff_x = embedding[i2] - embedding[j2]; + double diff_y = embedding[i2 + 1] - embedding[j2 + 1]; + + window_plogq += + P_data[k] * + (mlogZ - std::log(1.0 + diff_x * diff_x + diff_y * diff_y)); + } + } + partial_plogq[thread_id] += window_plogq; + }; + parallel_for(n_samples, n_threads, worker); + + return std::accumulate(partial_plogq.begin(), partial_plogq.end(), 0.0); +} + +std::vector bh_tsne_gradient(const std::vector &indices, + const std::vector &indptr, + const std::vector &P_data, + const std::vector &embedding, + double theta, double eps, + std::size_t n_threads) { + + QuadTree tree(embedding, eps); + std::vector gradient(embedding.size(), 0.0); + + // don't re-arrange the order of the following two function calls! + bh_tsne_negative_gradient(tree, embedding, theta * theta, eps, n_threads, + gradient); + bh_tsne_positive_gradient(indices, indptr, P_data, embedding, n_threads, + gradient); + + return gradient; +} +} // namespace smallvis + +#endif // SMALLVIS_BH_H diff --git a/smallvis/src/gradients.cpp b/smallvis/src/gradients.cpp index b0e4062..69ef34c 100644 --- a/smallvis/src/gradients.cpp +++ b/smallvis/src/gradients.cpp @@ -10,7 +10,7 @@ void mmds_grad(const std::vector &R, const std::vector &D, const std::vector &Y, double eps, std::vector &gradient, std::size_t start, std::size_t end, std::size_t n, std::size_t d) { - + for (std::size_t i = start; i < end; ++i) { const std::size_t i_d = i * d; const std::size_t i_n = i * n; @@ -27,14 +27,13 @@ void mmds_grad(const std::vector &R, const std::vector &D, } } - void tsne_grad(const std::vector &P, const std::vector &W, double Z, const std::vector &Y, std::vector &gradient, std::size_t start, std::size_t end, std::size_t n) { const double Z4 = 4.0 / Z; - + for (std::size_t i = start; i < end; ++i) { const std::size_t i_d = i + i; const std::size_t i_n = i * n; @@ -256,10 +255,9 @@ NumericMatrix d2_to_tweight_cpp(NumericMatrix dist_matrix, int n_threads) { } // [[Rcpp::export]] -NumericMatrix tsne_grad_cpp(const NumericMatrix &P, - const NumericMatrix &W, double Z, - const NumericMatrix &Y, - std::size_t n_threads) { +NumericMatrix tsne_grad_cpp(const NumericMatrix &P, const NumericMatrix &W, + double Z, const NumericMatrix &Y, + std::size_t n_threads) { std::size_t n = Y.nrow(); std::vector P_vec(P.begin(), P.end()); @@ -300,35 +298,33 @@ NumericMatrix tsne_grad_cpp(const NumericMatrix &P, } // [[Rcpp::export]] -NumericMatrix mmds_grad_cpp(const NumericMatrix &R, - const NumericMatrix &D, - const NumericMatrix &Y, - double eps, +NumericMatrix mmds_grad_cpp(const NumericMatrix &R, const NumericMatrix &D, + const NumericMatrix &Y, double eps, std::size_t n_threads) { std::size_t n = Y.nrow(); std::size_t d = Y.ncol(); - + std::vector R_vec(R.begin(), R.end()); std::vector D_vec(D.begin(), D.end()); - + std::vector Y_vec(n * d); for (std::size_t i = 0; i < n; ++i) { for (std::size_t j = 0; j < d; ++j) { Y_vec[i * d + j] = Y(i, j); } } - + std::vector gradient_vec(n * d, 0.0); - + if (n_threads > 1) { std::size_t chunk_size = (n + n_threads - 1) / n_threads; std::vector threads; for (std::size_t t = 0; t < n_threads; ++t) { std::size_t start_row = t * chunk_size; std::size_t end_row = std::min(start_row + chunk_size, n); - threads.emplace_back(mmds_grad, std::cref(R_vec), std::cref(D_vec), - std::cref(Y_vec), eps, std::ref(gradient_vec), start_row, - end_row, n, d); + threads.emplace_back(mmds_grad, std::cref(R_vec), std::cref(D_vec), + std::cref(Y_vec), eps, std::ref(gradient_vec), + start_row, end_row, n, d); } for (auto &thread : threads) { thread.join(); @@ -336,16 +332,13 @@ NumericMatrix mmds_grad_cpp(const NumericMatrix &R, } else { mmds_grad(R_vec, D_vec, Y_vec, eps, gradient_vec, 0, n, n, d); } - + NumericMatrix gradient(n, d); for (std::size_t i = 0; i < n; ++i) { for (std::size_t j = 0; j < d; ++j) { gradient(i, j) = gradient_vec[i * d + j]; } } - + return gradient; } - - - diff --git a/smallvis/src/perplexity.cpp b/smallvis/src/perplexity.cpp index af7c85d..0acdf66 100644 --- a/smallvis/src/perplexity.cpp +++ b/smallvis/src/perplexity.cpp @@ -195,7 +195,7 @@ void find_beta(const std::vector &data, std::size_t n, std::size_t d, List find_beta_knn_cpp(const NumericMatrix &knn_distances, const IntegerMatrix &knn_indices, double perplexity = 15, double tol = 1e-5, int max_tries = 50, - std::size_t n_threads = 1) { + bool ret_sparse = false, std::size_t n_threads = 1) { const std::size_t n = knn_distances.nrow(); const std::size_t k = knn_distances.ncol(); @@ -232,15 +232,21 @@ List find_beta_knn_cpp(const NumericMatrix &knn_distances, beta, bad_perp, 0, n); } - NumericMatrix P(n, n); - for (std::size_t i = 0; i < n; ++i) { - for (std::size_t j = 0; j < k; ++j) { - P(i, knn_indices(i, j) - 1) = W[i * k + j]; + if (ret_sparse) { + NumericVector P(n * k); + std::copy(W.begin(), W.end(), P.begin()); + return List::create(Named("P") = P, Named("beta") = beta, + Named("bad_perp") = bad_perp); + } else { + NumericMatrix P(n, n); + for (std::size_t i = 0; i < n; ++i) { + for (std::size_t j = 0; j < k; ++j) { + P(i, knn_indices(i, j) - 1) = W[i * k + j]; + } } + return List::create(Named("P") = P, Named("beta") = beta, + Named("bad_perp") = bad_perp); } - - return List::create(Named("P") = P, Named("beta") = beta, - Named("bad_perp") = bad_perp); } // [[Rcpp::export]] diff --git a/smallvis/src/threads.h b/smallvis/src/threads.h new file mode 100644 index 0000000..d7c9c6e --- /dev/null +++ b/smallvis/src/threads.h @@ -0,0 +1,65 @@ +#ifndef SMALLVIS_THREADS_H +#define SMALLVIS_THREADS_H + +#include +#include + +// Trait to detect the arity of the worker (2- or 3- arg) +template struct function_traits; + +template +struct function_traits> { + static constexpr std::size_t arity = sizeof...(Args); +}; + +template auto make_function(T &&t) { + return std::function{std::forward(t)}; +} + +// Enable if the callable has 3 arguments (start, end, thread_id) +template +std::enable_if_t()))>::arity == 3> +parallel_for(std::size_t N, std::size_t n_threads, WorkerFunc worker) { + if (n_threads > 1) { + std::size_t chunk_size = N / n_threads; + std::vector threads; + + for (std::size_t t = 0; t < n_threads; ++t) { + std::size_t start = t * chunk_size; + std::size_t end = (t == n_threads - 1) ? N : (t + 1) * chunk_size; + threads.emplace_back(worker, start, end, t); + } + + for (auto &t : threads) { + t.join(); + } + } else { + worker(0, N, 0); + } +} + +// Enable if the callable has 2 arguments (start, end) +template +std::enable_if_t()))>::arity == 2> +parallel_for(std::size_t N, std::size_t n_threads, WorkerFunc worker) { + if (n_threads > 1) { + std::size_t chunk_size = N / n_threads; + std::vector threads; + + for (std::size_t t = 0; t < n_threads; ++t) { + std::size_t start = t * chunk_size; + std::size_t end = (t == n_threads - 1) ? N : (t + 1) * chunk_size; + threads.emplace_back(worker, start, end); + } + + for (auto &t : threads) { + t.join(); + } + } else { + worker(0, N); + } +} + +#endif // SMALLVIS_THREADS_H \ No newline at end of file diff --git a/smallvis/tests/testthat/helper_api.R b/smallvis/tests/testthat/helper_api.R index 6b679ad..d4ad980 100644 --- a/smallvis/tests/testthat/helper_api.R +++ b/smallvis/tests/testthat/helper_api.R @@ -1,7 +1,7 @@ -expect_api <- function(method, Y, cost, X = iris10, use_cpp = FALSE) { +expect_api <- function(method, Y, cost, X = iris10, use_cpp = FALSE, perplexity = 5) { res <- smallvis(X, Y_init = iris10_Y, method = method, eta = 0.1, - perplexity = 5, epoch_callback = NULL, verbose = FALSE, + perplexity = perplexity, epoch_callback = NULL, verbose = FALSE, ret_extra = TRUE, use_cpp = use_cpp) expect_equal(res$Y, c2y(Y), tolerance = 1e-3, info = paste0(method[[1]], " Y")) expect_equal(final_cost(res), cost, tolerance = 1e-4, info = paste0(method[[1]], " cost")) diff --git a/smallvis/tests/testthat/test_api.R b/smallvis/tests/testthat/test_api.R index 1ae7e7e..bc1d4cd 100644 --- a/smallvis/tests/testthat/test_api.R +++ b/smallvis/tests/testthat/test_api.R @@ -519,7 +519,17 @@ test_that("Miscellany", { expect_api(method = "tsne", Y = c(-4.617, 2.008, -0.8907, 4.044, 0.6146, -3.478, 0.4454, 1.967, 4.82, -4.913, 0.5231, 0.837, 0.02866, -1.871, -1.438, 1.273, 0.6842, -1.849, -0.6084, 2.42), X = ui10, use_cpp = TRUE, - cost = 0.02485) + cost = 0.02485) + expect_api(method = list("bhtsne", perplexity = 3, inp_kernel = "perpnnks", theta = 0.0), + Y = c(-12.52, 4.749, -1.071, 10.54, 2.669, -10.44, 1.578, 5.019, + 13.21, -13.73, 3.153, 3.662, 1.081, -7.454, -3.565, 4.962, + 2.446, -5.197, -6.536, 7.447), X = ui10, use_cpp = TRUE, perplexity = 3, + cost = 0.05768) + expect_api(method = list("bhtsne", perplexity = 3, inp_kernel = "perpnnks", theta = 0.5), + Y = c(-11.78, 4.035, -1.215, 10.35, 2.76, -9.966, 1.177, 5.08, + 12.8, -13.23, 2.413, 3.66, 0.7804, -6.291, -3.386, 4.257, + 2.233, -4.78, -5.25, 6.362), X = ui10, use_cpp = TRUE, perplexity = 3, + cost = 0.05564) }) From 25fb184cf2311ea1118d117f7c21d7dec8924956 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sun, 11 Aug 2024 21:24:59 -0700 Subject: [PATCH 24/26] Add license to threading functions --- smallvis/src/threads.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/smallvis/src/threads.h b/smallvis/src/threads.h index d7c9c6e..af38696 100644 --- a/smallvis/src/threads.h +++ b/smallvis/src/threads.h @@ -1,9 +1,42 @@ +// BSD 3-Clause License +// +// Copyright (c) 2024, James Melville +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + #ifndef SMALLVIS_THREADS_H #define SMALLVIS_THREADS_H #include #include +namespace smallvis { + // Trait to detect the arity of the worker (2- or 3- arg) template struct function_traits; @@ -61,5 +94,6 @@ parallel_for(std::size_t N, std::size_t n_threads, WorkerFunc worker) { worker(0, N); } } +} // namespace smallvis #endif // SMALLVIS_THREADS_H \ No newline at end of file From aea582f9feb4eceb7a921566303992eb2b1dc2dd Mon Sep 17 00:00:00 2001 From: James Melville Date: Sun, 11 Aug 2024 21:26:07 -0700 Subject: [PATCH 25/26] Mention initial Barnes-Hut and related code --- README.md | 80 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 56 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 887b60d..d18cd49 100644 --- a/README.md +++ b/README.md @@ -2,28 +2,56 @@ An R package for small-scale dimensionality reduction using neighborhood-preservation -dimensionality reduction methods, including [t-Distributed Stochastic Neighbor Embedding](https://lvdmaaten.github.io/tsne/), -[LargeVis](https://arxiv.org/abs/1602.00370) and -[UMAP](https://arxiv.org/abs/1802.03426). - -LargeVis and UMAP are of particular interest because they seem to give -visualizations which are very competitive with t-SNE, but can use stochastic -gradient descent to give faster run times and/or better scaling with dataset -size than the typical Barnes-Hut t-SNE implementation. - -This package is designed to make it easier to experiment with and compare these -methods, by removing differences in implementation details. - -One way it does this is by abandoning the more advanced nearest-neighbor -methods, distance approximations, sampling, and multi-threaded stochastic -gradient descent techniques. The price paid for this simplification is that the -algorithms are back to being O(N^2) in storage and computation costs (and being -in pure R). Unlike UMAP, -[the official implementation of LargeVis](https://github.com/lferry007/LargeVis), -and the -[Barnes-Hut implementation of t-SNE](https://github.com/lvdmaaten/bhtsne), -this package is therefore *not* suitable for large scale visualization. -Hence the name smallvis. +dimensionality reduction methods, including +[t-Distributed Stochastic Neighbor Embedding](https://lvdmaaten.github.io/tsne/), +and a non-stochastic version of the [LargeVis](https://arxiv.org/abs/1602.00370) +and [UMAP](https://arxiv.org/abs/1802.03426) cost functions. + +The purpose of this package is to make it easier to experiment with different +dimensionality reduction methods while having more control over things like +input scaling, nearest neighbor calculations, initialization and optimization, +which can make comparisons between different packages difficult. Be warned, +most implementations are not optimized for speed and scale like O(N^2), but see +below for my dream to upgrade `smallvis` to more of a mediumvis. + +*August 11 2024* **The Turbo Championship Edition Update**. I have briefly +brought `smallvis` back from the dead to speed it up a bit. I have added: + +* Barnes-Hut t-SNE. This will scale up to larger datasets and it is feasible +to run it on the full MNIST digits dataset (i.e. 70,000 items). This uses a +(2D only) C++ translation of the cython implementation in the Python +[openTSNE](https://github.com/pavlin-policar/openTSNE) package originally +authored by [Pavlin Poličar](https://github.com/pavlin-policar). It's BSD +3-clause licensed (and can be found in `src/bh.h`). Use it with +`method = "bhtsne"`. The degree of approximation can be controlled with +`theta`. Be aware that it's not as fast as e.g. +[Rtsne](https://github.com/jkrijthe/Rtsne), at least during the optimization +step. That package uses Laurens van der Maaten's original C++ code which I am +very unsure can be redistributed with R code due to its BSD 4-clause license. I +would love to be wrong about that though! The Quad Tree implementation could +be used with other embedding methods, but I haven't got round to implementing +that yet. +* A C++ multi-threaded perplexity search using only the nearest neighbors of +each point (3 times the perplexity) is used with BH t-SNE. +* My own [rnndescent](https://cran.r-project.org/package=rnndescent) package +replaces FNN for nearest neighbor search. Apart from being a monument to my ego, +it can be faster than FNN for brute force search because it can be +multi-threaded (use `n_threads` to control this). Also, approximate nearest +neighbor search becomes quite important with larger datasets. +* For exact search, I have started adding multi-threaded C++ code to calculate +the gradient. Set `use_cpp = TRUE` to use this. It's not as big a win in speed +up as you might hope because the R code is using some very optimized linear +algebra for some steps which will blow my puny C++ code out of the water. +However in many cases the linear algebra libraries won't be mult-threaded so +sheer brute force threads can overcome this. Just don't expect setting `n` +threads to give you `n` times the speed. Like Barnes-Hut, this requires me to +implement the gradients in C++ for each method and I haven't done that yet. +* [irlba](https://cran.r-project.org/package=irlba) is now a dependency for +doing PCA on larger datasets. + +I will probably at least attempt to apply Barnes-Hut and multi-threading to some +other methods. On the other hand, it's taken me five years to get back to this, +so don't hold your breath. ## Prerequisites @@ -61,6 +89,10 @@ library(smallvis) # Automatically plots the results during optimization tsne_iris <- smallvis(iris, perplexity = 25, verbose = TRUE) +# Barnes-Hut: +bhtsne_iris <- smallvis(iris, perplexity = 25, method = "bhtsne", theta = 0.8) + + # Using a custom epoch_callback uniq_spec <- unique(iris$Species) colors <- rainbow(length(uniq_spec)) @@ -205,7 +237,7 @@ scale. Also of relevance are: * [UMAP](https://github.com/lmcinnes/umap) (in Python) -* [UWOT](https://github.com/jlmelville/uwot) a package implementing LargeVis +* [uwot](https://github.com/jlmelville/uwot) a package implementing LargeVis and UMAP. * [LargeVis](https://github.com/lferry007/LargeVis) (in C++) * [Spectra](http://spectralib.org/), the C++ library that RSpectra wraps. @@ -221,5 +253,5 @@ of Justin Donaldson's R package for [t-SNE](https://cran.r-project.org/package=t [GPLv2 or later](https://www.gnu.org/licenses/gpl-2.0.txt). Any LargeVis-specific code (e.g. cost and gradient calculation) can also be considered [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0). -Similarly, UMAP-related code is also licensed as +Similarly, Barnes-Hut and UMAP-related code is also licensed as [BSD 3-clause](https://opensource.org/licenses/BSD-3-Clause). From 18725dbb26dcff9c902f69c0f4ec40e733c1abb2 Mon Sep 17 00:00:00 2001 From: James Melville Date: Sun, 11 Aug 2024 21:26:34 -0700 Subject: [PATCH 26/26] Time for a new version --- smallvis/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smallvis/DESCRIPTION b/smallvis/DESCRIPTION index 7470b56..1b1e135 100644 --- a/smallvis/DESCRIPTION +++ b/smallvis/DESCRIPTION @@ -1,7 +1,7 @@ Package: smallvis Type: Package Title: Small Scale Neighborhood Embedding Algorithms -Version: 0.0.1.9001 +Version: 0.0.2.9001 Authors@R: person("James", "Melville", email = "jlmelville@gmail.com", role = c("aut", "cre")) Description: Neighborhood embedding methods for small scale visualization, such