From 4d188761e333df37971f07150f0b4871e6524e3f Mon Sep 17 00:00:00 2001
From: huizezhang-sherry
-
-
The ferrn package extracts key components in the data object collected by the guided tour optimisation, and produces diagnostic plots. An associated paper can be found at https://journal.r-project.org/archive/2021/RJ-2021-105/index.html. The ferrn package extracts key components from the data object collected during projection pursuit (PP) guided tour optimisation, produces diagnostic plots, and calculates PP index scores. An associated paper can be found at https://journal.r-project.org/archive/2021/RJ-2021-105/index.html. To extract the data object from a guided tour, assign the The data object collected during a PP optimisation can be obtained by assigning the The above code will collect data from the 1D animation on The best projection basis found by the projection pursuit algorithm can be extracted via The data structure includes the The best projection basis can be extracted via Trace plot for viewing the optimisation progression with botanical palette: The trace plot can be used to view the optimisation progression: Compare two algorithms via plotting the projection bases on the reduced PCA space: Different optimisers can be compared by plotting their projection bases on the reduced PCA space. Here View the projection bases on its original 5-D space via tour animation: The same set of bases can be visualised in the original 5-D space via tour animation:
-Installation
@@ -94,18 +94,18 @@ Installationremotes::install_github("huizezhang-sherry/ferrn")
Usage
+
Examples
-annimate_xx()
function a name:tourr::annimate_xx()
function a name. In the following example, the projection pursuit is finding the best projection basis that can detect multi-modality for the boa5
dataset using the holes()
index function and the optimiser search_better
:
set.seed(123456)
holes_1d_better <- animate_dist(
ferrn::boa5,
- tour_path = guided_tour(holes(), d = 1,
- search_f = search_better),
- rescale = FALSE)
boa5
dataset, a simulated data in the ferrn
package.basis
sampled by the optimiser, their corresponding index values (index_val
), an information
tag explaining the optimisation states, and the optimisation method
used (search_better
). The variables tries
and loop
describe the number of iterations and samples in the optimisation process, respectively. The variable id
serves as the global identifier.
library(ferrn)
library(dplyr)
@@ -123,13 +123,13 @@
Usage
#> [5,] 0.093725721
holes_1d_better %>% get_best() %>% pull(index_val)
#> [1] 0.9136095
holes_1d_better %>%
explore_trace_interp() +
scale_color_continuous_botanical()
holes_1d_geo
is the data obtained from the same PP problem as holes_1d_better
introduced above, but with a search_geodesic
optimiser. The 5 × 1 bases from the two datasets are first reduced to 2D via PCA, and then plotted to the PCA space. (PP bases are ortho-normal and the space for n × 1 bases is an n-d sphere, hence a circle when projected into 2D.)
bind_rows(holes_1d_geo, holes_1d_better) %>%
bind_theoretical(matrix(c(0, 1, 0, 0, 0), nrow = 5),
@@ -137,7 +137,7 @@
Usage
explore_space_pca(group = method, details = TRUE) +
scale_color_discrete_botanical()
bind_rows(holes_1d_geo, holes_1d_better) %>%
explore_space_tour(flip = TRUE, group = method,
@@ -145,9 +145,36 @@
Usage
max_frames = 20,
point_size = 2, end_size = 5)
+
+holes <- function() {
+ function(mat) {
+ n <- nrow(mat)
+ d <- ncol(mat)
+
+ num <- 1 - 1 / n * sum(exp(-0.5 * rowSums(mat^2)))
+ den <- 1 - exp(-d / 2)
+
+ num / den
+ }
+}
+
+basis_smoothness <- sample_bases(idx = "holes")
+calc_smoothness(basis_smoothness)
+#> # PP index: holes
+#> # No. of bases: 300 [6 x 2]
+#> variance range smoothness nugget convergence
+#> <dbl> <dbl> <dbl> <dbl> <lgl>
+#> 1 0.00000672 18.1 1.03 1138. TRUE
+basis_squint <- sample_bases(idx = "holes", n_basis = 100, step_size = 0.01, min_proj_dist = 1.5)
+calc_squintability(basis_squint, method = "ks", bin_width = 0.01)
+#> # PP index: holes
+#> # No. of bases: 100 -> 17159
+#> # method: ks
+#> max_x max_d squint
+#> <dbl> <dbl> <dbl>
+#> 1 1.87 0.482 0.901
zE-Wkr6FE-iOXshc@?2u8*V&|}*$+_JKb`ju4ka@hFV}}EiTqxYY(wyE57xMoI z7LY9=lqed2%s%toEC88r90o2zcL=4gNF?{E)c-Ez14oboAc&qE;s5Urd{?k?pm2PT z6@U@{yB1g67yO5$AU)u}JNWIPW`S_HZ<-nZdtSB}5VV}A3@42Ae|H!s;e%pwKXNmD zWAgu8cBF5}O>;&{`R@+DMSa0$S96ym|0Qef|C=MwV|x|5>Hj6GJsKd3!+wF??7utw zzz25t|HZh%@h0Z~VMvAK+S M2P{ #8o0UsDs&J=`9=_lN9 Om4SnPeo(A_lb0M6Ep-UFR8&;7P?#UM?%97L^h$t9Q62#rdtPuW zLP*zO!6->bDblbhGlSn!Il*0&FmS;rQbbW2beyV|K5=#H+izT<{{rAmG$;}a08Ln+ zuQq3h7i>~O`Oi`(Z&K?^99u972aW#JMBGMFcd|esD %+)gcmZiVu+CNVl|V%u|)$|q8Q=ml>(y@>dpRD2N#ez zimcr<3r9f+qXa(HKolOIpyx4)-;mp04$y!U`U4X{ESOAh*1jVS$q$`QG6}94Zzhx$ z%8Z{zAH$!jCTBbyc_kf!20VLAC`~!yCxlq6oz_tUOi@eyQ?MOPq#}<#h3#@6I*N}8 z(6exIua#8DMx3wq-qGQqL+NhVq*GUvaY9> zjK&m$UEXw*4>;>~FL2W?+& ~)$%ZW6~^$pJ} zY4$>-C>T(Zg2v{Yz(I-p wAE 2m(mn zw40bSMmr kI|13jqo? zgK|#cEG$vokXKZq9ZiVF*3FW0;>HyZP}ct%P!ZvFG#(~>& ;O154Vk}}j%Ojhg2=x>vmx(;LgU2=Edc3@AadJE zfnVU%{7rp)c!!T)7|uVy-I3}gZ?7{)c3fxkFq{*@3B8@|&&KYsczY@+Q1)vE#qjZp zr)%Jw0w!-q8)gWYWlgC?LWl(cff=z~(vs3sLrETrpWf)pSAgLl!oDjP{ObnBXg^?q zBcc8qN{wnigJc~a9UT02c|7~0769=l2O*lNsC-P1^C#~`5F_LQ&K4Z V8r^ONEBBcL$GtfLBJg$; zGqy@hG}t6iO$JnsfiN2ES11d?&R>b)vF%a72&&L#UB&Pflz$_wg5I=5YX- zmP6KvGLw_3uHujra1UeK_W&n}hsUcU4Tb_VG(!bv*q7FT3;^&C)ba_pjo=US7z{_v z(E)GX`fST#FPcaXF7wRJRBN=4epg#ej}(H!iji?ai6I~=5H%acw+8I}e&iRkIH4IB zg@?$oW6K8aH$Q6!0-)c-A|QVXsS6Ylsz8wsoVHthfRypr_Hpg+;M6+E0|r7enNRt^ z>xAuV*XvmWEaGeQ07FC6?IHnw0QfVvM~0JO5dh1R@Wpygja~L25duyhDt3rSBmxlJ z)^}5poIZx#bTPs-h+c4SqnvwxNbI*o=l>0e*tg!W7Ya}AV#Ev}6itj8d=?X;zVe_c zTQdHgi4?^R+#lUWb}9z`Dco*jUvS3$(6C(R7F6PWK)Zi>jPGS=