Skip to content

Commit

Permalink
add bh param
Browse files Browse the repository at this point in the history
  • Loading branch information
jlmelville committed Aug 31, 2024
1 parent 22908ea commit e426f6f
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 20 deletions.
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,11 @@ library(smallvis)
# Automatically plots the results during optimization
tsne_iris <- smallvis(iris, perplexity = 25, verbose = TRUE)

# Barnes-Hut:
bhtsne_iris <- smallvis(iris, perplexity = 25, method = "bhtsne", theta = 0.8)

# Barnes-Hut recommended settings:
bhtsne_iris <- smallvis(iris, bh = TRUE, n_threads = 4, perplexity = 30,
nn = "approximate", inp_kernel = "knn", theta = 1.0,
exaggeration_factor = 12, stop_lying_iter = 250,
Y_init = "spca")

# Using a custom epoch_callback
uniq_spec <- unique(iris$Species)
Expand Down
31 changes: 26 additions & 5 deletions smallvis/R/smallvis.R
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,14 @@
#' @param theta Barnes-Hut approximation accuracy. Default is \code{1.0} which
#' more or less corresponds to the default degree of approximation used in
#' the \code{Rtsne} package. Set to 0.0 for exact t-SNE. Applies only for
#' \code{method = "bhtsne"}.
#' \code{bh = "TRUE"}.
#' @param bh if \code{TRUE} use the Barnes-Hut method to approximate
#' repulsive interactions (van der Maaten, 2014), which allows neighbor
#' embedding methods to scale to larger datasets. For large datasets, you will
#' also want to set \code{inp_kernel} to use only k-nearest neighbors for the
#' input similarities, i.e. one of \code{"nngaussian"} or \code{"knn"}.
#' You may also want to set \code{nn = "approximate"}. Not all methods support
#' \code{bh = TRUE}.
#' @param inp_kernel For t-SNE-like methods, determines how the input
#' similarities are calculated. Possible values are:
#' \itemize{
Expand Down Expand Up @@ -741,6 +748,13 @@
#' # Also late exaggeration to improve cluster separation
#' tsne_late_ex <- smallvis(iris, exaggeraton_factor = 4, stop_lying_iter = 100,
#' late_exaggeration_factor = 1.5, start_late_lying_iter = 900)
#'
#' # Barnes-Hut, approximate nearest neighbors, multi-threading settings
#' # recommended for large datasets
#' bhtsne_iris <- smallvis(iris, bh = TRUE, n_threads = 4, perplexity = 30,
#' nn = "approximate", inp_kernel = "knn",
#' exaggeration_factor = 12, stop_lying_iter = 250,
#' Y_init = "spca")
#' }
#' @references
#' Belkin, M., & Niyogi, P. (2002).
Expand Down Expand Up @@ -903,6 +917,7 @@ smallvis <- function(X,
ret_extra = FALSE,
n_threads = 0,
use_cpp = FALSE,
bh = FALSE,
theta = 1.0,
eps = .Machine$double.eps,
inp_kernel = NULL,
Expand All @@ -927,7 +942,8 @@ smallvis <- function(X,
use_cpp,
theta = theta,
inp_kernel = inp_kernel,
nn = nn
nn = nn,
bh = bh
)

if (exaggeration_factor != 1) {
Expand Down Expand Up @@ -1436,7 +1452,8 @@ create_cost <- function(method,
use_cpp,
theta,
inp_kernel,
nn) {
nn,
bh) {
method_names <- c(
"tsne",
"largevis",
Expand Down Expand Up @@ -1485,11 +1502,15 @@ create_cost <- function(method,
"jssne",
"gsne",
"abssne",
"bhssne",
"bhtsne"
"bhssne"
)
if (is.character(method)) {
method <- match.arg(tolower(method), method_names)

if (method == "tsne" && bh) {
method <- "bhtsne"
}

cost_fn <- switch(method,
tsne = tsne(
perplexity = perplexity,
Expand Down
18 changes: 17 additions & 1 deletion smallvis/man/smallvis.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 35 additions & 7 deletions smallvis/tests/testthat/helper_api.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,36 @@
expect_api <- function(method, Y, cost, X = iris10, use_cpp = FALSE, perplexity = 5) {
res <- smallvis(X, Y_init = iris10_Y,
method = method, eta = 0.1,
perplexity = perplexity, epoch_callback = NULL, verbose = FALSE,
ret_extra = TRUE, use_cpp = use_cpp)
expect_equal(res$Y, c2y(Y), tolerance = 1e-3, info = paste0(method[[1]], " Y"))
expect_equal(final_cost(res), cost, tolerance = 1e-4, info = paste0(method[[1]], " cost"))
expect_api <- function(method,
Y,
cost,
X = iris10,
use_cpp = FALSE,
perplexity = 5,
theta = 0.0,
nn = "exact",
bh = FALSE,
inp_kernel = "gaussian") {
res <- smallvis(
X,
Y_init = iris10_Y,
method = method,
eta = 0.1,
perplexity = perplexity,
epoch_callback = NULL,
verbose = FALSE,
ret_extra = TRUE,
use_cpp = use_cpp,
theta = theta,
bh = bh,
nn = nn,
inp_kernel = inp_kernel
)
expect_equal(res$Y,
c2y(Y),
tolerance = 1e-3,
info = paste0(method[[1]], " Y")
)
expect_equal(final_cost(res),
cost,
tolerance = 1e-4,
info = paste0(method[[1]], " cost")
)
}
12 changes: 8 additions & 4 deletions smallvis/tests/testthat/test_api.R
Original file line number Diff line number Diff line change
Expand Up @@ -520,15 +520,19 @@ test_that("Miscellany", {
1.967, 4.82, -4.913, 0.5231, 0.837, 0.02866, -1.871, -1.438,
1.273, 0.6842, -1.849, -0.6084, 2.42), X = ui10, use_cpp = TRUE,
cost = 0.02485)
expect_api(method = list("bhtsne", perplexity = 3, inp_kernel = "nngaussian", nn = "exact", theta = 0.0),
expect_api(method = "tsne", X = ui10, use_cpp = TRUE, bh = TRUE,
perplexity = 3, inp_kernel = "nngaussian", nn = "exact",
theta = 0.0,
Y = c(-12.52, 4.749, -1.071, 10.54, 2.669, -10.44, 1.578, 5.019,
13.21, -13.73, 3.153, 3.662, 1.081, -7.454, -3.565, 4.962,
2.446, -5.197, -6.536, 7.447), X = ui10, use_cpp = TRUE, perplexity = 3,
2.446, -5.197, -6.536, 7.447),
cost = 0.05768)
expect_api(method = list("bhtsne", perplexity = 3, inp_kernel = "nngaussian", nn = "exact", theta = 0.5),
expect_api(method = "tsne", X = ui10, use_cpp = TRUE, bh = TRUE,
perplexity = 3, inp_kernel = "nngaussian", nn = "exact",
theta = 0.5,
Y = c(-11.78, 4.035, -1.215, 10.35, 2.76, -9.966, 1.177, 5.08,
12.8, -13.23, 2.413, 3.66, 0.7804, -6.291, -3.386, 4.257,
2.233, -4.78, -5.25, 6.362), X = ui10, use_cpp = TRUE, perplexity = 3,
2.233, -4.78, -5.25, 6.362),
cost = 0.05564)

})
Expand Down

0 comments on commit e426f6f

Please sign in to comment.