diff --git a/DESCRIPTION b/DESCRIPTION index 122ac97..0d215b3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -44,5 +44,6 @@ Suggests: pkgdown, testthat, forcats, - patchwork + patchwork, + future.apply Language: en-GB diff --git a/R/calc-squintability.R b/R/calc-squintability.R index 1147863..3f3cb66 100644 --- a/R/calc-squintability.R +++ b/R/calc-squintability.R @@ -35,17 +35,7 @@ #' @examples #' # define the holes index as per tourr::holes #' library(GpGp) -#' holes <- function() { -#' function(mat) { -#' n <- nrow(mat) -#' d <- ncol(mat) -#' -#' num <- 1 - 1 / n * sum(exp(-0.5 * rowSums(mat^2))) -#' den <- 1 - exp(-d / 2) -#' -#' num / den -#' } -#' } +#' library(tourr) #' basis_smoothness <- sample_bases(idx = "holes") #' calc_smoothness(basis_smoothness) #' basis_squint <- sample_bases(idx = "holes", n_basis = 100, step_size = 0.01, min_proj_dist = 1.5) diff --git a/README.Rmd b/README.Rmd index 3ce304f..5cff2cd 100644 --- a/README.Rmd +++ b/README.Rmd @@ -1,5 +1,6 @@ --- output: github_document +bibliography: '`r system.file("reference.bib", package = "ferrn")`' editor_options: chunk_output_type: console --- @@ -21,7 +22,7 @@ knitr::opts_chunk$set( [![R build status](https://github.com/huizezhang-sherry/ferrn/workflows/R-CMD-check/badge.svg)](https://github.com/huizezhang-sherry/ferrn/actions) -The **ferrn** package extracts key components in the data object collected by the guided tour optimisation, and produces diagnostic plots. An associated paper can be found at . +The **ferrn** package extracts key components from the data object collected during projection pursuit (PP) guided tour optimisation, produces diagnostic plots, and calculates PP index scores. ## Installation @@ -33,22 +34,22 @@ remotes::install_github("huizezhang-sherry/ferrn") ``` -## Usage +## Visualise PP optimisation -To extract the data object from a guided tour, assign the `annimate_xx()` function a name: +The data object collected during a PP optimisation can be obtained by assigning the `tourr::annimate_xx()` function a name. In the following example, the projection pursuit is finding the best projection basis that can detect multi-modality for the `boa5` dataset using the `holes()` index function and the optimiser `search_better`: ```{r eval = FALSE} set.seed(123456) holes_1d_better <- animate_dist( ferrn::boa5, - tour_path = guided_tour(holes(), d = 1, - search_f = search_better), + tour_path = guided_tour(holes(), d = 1, search_f = search_better), rescale = FALSE) +holes_1d_better ``` -The above code will collect data from the 1D animation on `boa5` dataset, a simulated data in the `ferrn` package. +The data structure includes the `basis` sampled by the optimiser, their corresponding index values (`index_val`), an `information` tag explaining the optimisation states, and the optimisation `method` used (`search_better`). The variables `tries` and `loop` describe the number of iterations and samples in the optimisation process, respectively. The variable `id` serves as the global identifier. -The best projection basis found by the projection pursuit algorithm can be extracted via +The best projection basis can be extracted via ```{r get-best} library(ferrn) @@ -58,8 +59,7 @@ holes_1d_better %>% get_best() %>% pull(basis) %>% .[[1]] holes_1d_better %>% get_best() %>% pull(index_val) ``` - -Trace plot for viewing the optimisation progression with botanical palette: +The trace plot can be used to view the optimisation progression: ```{r trace-plot} holes_1d_better %>% @@ -67,8 +67,7 @@ holes_1d_better %>% scale_color_continuous_botanical() ``` -Compare two algorithms via plotting the projection bases on the reduced PCA space: - +Different optimisers can be compared by plotting their projection bases on the reduced PCA space. Here `holes_1d_geo` is the data obtained from the same PP problem as `holes_1d_better` introduced above, but with a `search_geodesic` optimiser. The 5 $\times$ 1 bases from the two datasets are first reduced to 2D via PCA, and then plotted to the PCA space. (PP bases are ortho-normal and the space for $n \times 1$ bases is an $n$-d sphere, hence a circle when projected into 2D.) ```{r pca-plot} bind_rows(holes_1d_geo, holes_1d_better) %>% @@ -78,8 +77,7 @@ bind_rows(holes_1d_geo, holes_1d_better) %>% scale_color_discrete_botanical() ``` - -View the projection bases on its original 5-D space via tour animation: +The same set of bases can be visualised in the original 5-D space via tour animation: ```{r tour-anim, eval = FALSE} bind_rows(holes_1d_geo, holes_1d_better) %>% @@ -114,3 +112,29 @@ render_gif(

+ +## Calculate PP index scores + +Properties of PP index described in @laa_using_2020s includes smoothness, squintability, flexibility, rotation invariance, and speed. Here implementations are provided to calculate smoothness and squintability scores. + +```{r} +# define the holes index as per tourr::holes +holes <- function() { + function(mat) { + n <- nrow(mat) + d <- ncol(mat) + + num <- 1 - 1 / n * sum(exp(-0.5 * rowSums(mat^2))) + den <- 1 - exp(-d / 2) + + num / den + } +} + +basis_smoothness <- sample_bases(idx = "holes") +calc_smoothness(basis_smoothness) +basis_squint <- sample_bases(idx = "holes", n_basis = 100, step_size = 0.01, min_proj_dist = 1.5) +calc_squintability(basis_squint, method = "ks", bin_width = 0.01) +``` + +# Reference diff --git a/README.md b/README.md index 04736c0..7c774f0 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,10 @@ status](https://github.com/huizezhang-sherry/ferrn/workflows/R-CMD-check/badge.svg)](https://github.com/huizezhang-sherry/ferrn/actions) -The **ferrn** package extracts key components in the data object -collected by the guided tour optimisation, and produces diagnostic -plots. An associated paper can be found at +The **ferrn** package extracts key components from the data object +collected during projection pursuit (PP) guided tour optimisation, +produces diagnostic plots, and calculates PP index scores. An associated +paper can be found at . ## Installation @@ -24,25 +25,31 @@ You can install the development version of ferrn from remotes::install_github("huizezhang-sherry/ferrn") ``` -## Usage +## Examples -To extract the data object from a guided tour, assign the -`annimate_xx()` function a name: +The data object collected during a PP optimisation can be obtained by +assigning the `tourr::annimate_xx()` function a name. In the following +example, the projection pursuit is finding the best projection basis +that can detect multi-modality for the `boa5` dataset using the +`holes()` index function and the optimiser `search_better`: ``` r set.seed(123456) holes_1d_better <- animate_dist( ferrn::boa5, - tour_path = guided_tour(holes(), d = 1, - search_f = search_better), + tour_path = guided_tour(holes(), d = 1, search_f = search_better), rescale = FALSE) +holes_1d_better ``` -The above code will collect data from the 1D animation on `boa5` -dataset, a simulated data in the `ferrn` package. +The data structure includes the `basis` sampled by the optimiser, their +corresponding index values (`index_val`), an `information` tag +explaining the optimisation states, and the optimisation `method` used +(`search_better`). The variables `tries` and `loop` describe the number +of iterations and samples in the optimisation process, respectively. The +variable `id` serves as the global identifier. -The best projection basis found by the projection pursuit algorithm can -be extracted via +The best projection basis can be extracted via ``` r library(ferrn) @@ -63,8 +70,7 @@ holes_1d_better %>% get_best() %>% pull(index_val) #> [1] 0.9136095 ``` -Trace plot for viewing the optimisation progression with botanical -palette: +The trace plot can be used to view the optimisation progression: ``` r holes_1d_better %>% @@ -74,8 +80,13 @@ holes_1d_better %>% -Compare two algorithms via plotting the projection bases on the reduced -PCA space: +Different optimisers can be compared by plotting their projection bases +on the reduced PCA space. Here `holes_1d_geo` is the data obtained from +the same PP problem as `holes_1d_better` introduced above, but with a +`search_geodesic` optimiser. The 5 $\times$ 1 bases from the two +datasets are first reduced to 2D via PCA, and then plotted to the PCA +space. (PP bases are ortho-normal and the space for $n \times 1$ bases +is an $n$-d sphere, hence a circle when projected into 2D.) ``` r bind_rows(holes_1d_geo, holes_1d_better) %>% @@ -87,7 +98,8 @@ bind_rows(holes_1d_geo, holes_1d_better) %>% -View the projection bases on its original 5-D space via tour animation: +The same set of bases can be visualised in the original 5-D space via +tour animation: ``` r bind_rows(holes_1d_geo, holes_1d_better) %>% @@ -98,7 +110,36 @@ bind_rows(holes_1d_geo, holes_1d_better) %>% ```

- -

+ +``` r +# define the holes index as per tourr::holes +holes <- function() { + function(mat) { + n <- nrow(mat) + d <- ncol(mat) + + num <- 1 - 1 / n * sum(exp(-0.5 * rowSums(mat^2))) + den <- 1 - exp(-d / 2) + + num / den + } +} + +basis_smoothness <- sample_bases(idx = "holes") +calc_smoothness(basis_smoothness) +#> # PP index: holes +#> # No. of bases: 300 [6 x 2] +#> variance range smoothness nugget convergence +#> +#> 1 0.00000672 18.1 1.03 1138. TRUE +basis_squint <- sample_bases(idx = "holes", n_basis = 100, step_size = 0.01, min_proj_dist = 1.5) +calc_squintability(basis_squint, method = "ks", bin_width = 0.01) +#> # PP index: holes +#> # No. of bases: 100 -> 17159 +#> # method: ks +#> max_x max_d squint +#> +#> 1 1.87 0.482 0.901 +``` diff --git a/docs/index.html b/docs/index.html index 1bd377e..eaf52c1 100644 --- a/docs/index.html +++ b/docs/index.html @@ -84,7 +84,7 @@ -

The ferrn package extracts key components in the data object collected by the guided tour optimisation, and produces diagnostic plots. An associated paper can be found at https://journal.r-project.org/archive/2021/RJ-2021-105/index.html.

+

The ferrn package extracts key components from the data object collected during projection pursuit (PP) guided tour optimisation, produces diagnostic plots, and calculates PP index scores. An associated paper can be found at https://journal.r-project.org/archive/2021/RJ-2021-105/index.html.

Installation

@@ -94,18 +94,18 @@

Installationremotes::install_github("huizezhang-sherry/ferrn")

-

Usage +

Examples

-

To extract the data object from a guided tour, assign the annimate_xx() function a name:

+

The data object collected during a PP optimisation can be obtained by assigning the tourr::annimate_xx() function a name. In the following example, the projection pursuit is finding the best projection basis that can detect multi-modality for the boa5 dataset using the holes() index function and the optimiser search_better:

 set.seed(123456)
 holes_1d_better <- animate_dist(
   ferrn::boa5,
-  tour_path = guided_tour(holes(), d = 1,
-                          search_f =  search_better), 
-  rescale = FALSE)
-

The above code will collect data from the 1D animation on boa5 dataset, a simulated data in the ferrn package.

-

The best projection basis found by the projection pursuit algorithm can be extracted via

+ tour_path = guided_tour(holes(), d = 1, search_f = search_better), + rescale = FALSE) +holes_1d_better
+

The data structure includes the basis sampled by the optimiser, their corresponding index values (index_val), an information tag explaining the optimisation states, and the optimisation method used (search_better). The variables tries and loop describe the number of iterations and samples in the optimisation process, respectively. The variable id serves as the global identifier.

+

The best projection basis can be extracted via

 library(ferrn)
 library(dplyr)
@@ -123,13 +123,13 @@ 

Usage #> [5,] 0.093725721 holes_1d_better %>% get_best() %>% pull(index_val) #> [1] 0.9136095

-

Trace plot for viewing the optimisation progression with botanical palette:

+

The trace plot can be used to view the optimisation progression:

-

Compare two algorithms via plotting the projection bases on the reduced PCA space:

+

Different optimisers can be compared by plotting their projection bases on the reduced PCA space. Here holes_1d_geo is the data obtained from the same PP problem as holes_1d_better introduced above, but with a search_geodesic optimiser. The 5 × 1 bases from the two datasets are first reduced to 2D via PCA, and then plotted to the PCA space. (PP bases are ortho-normal and the space for n × 1 bases is an n-d sphere, hence a circle when projected into 2D.)

 bind_rows(holes_1d_geo, holes_1d_better) %>%
   bind_theoretical(matrix(c(0, 1, 0, 0, 0), nrow = 5),
@@ -137,7 +137,7 @@ 

Usage explore_space_pca(group = method, details = TRUE) + scale_color_discrete_botanical()

-

View the projection bases on its original 5-D space via tour animation:

+

The same set of bases can be visualised in the original 5-D space via tour animation:

 bind_rows(holes_1d_geo, holes_1d_better) %>%
   explore_space_tour(flip = TRUE, group = method,
@@ -145,9 +145,36 @@ 

Usage max_frames = 20, point_size = 2, end_size = 5)

-

-

- +

+
+
+holes <- function() {
+ function(mat) {
+   n <- nrow(mat)
+   d <- ncol(mat)
+
+   num <- 1 - 1 / n * sum(exp(-0.5 * rowSums(mat^2)))
+   den <- 1 - exp(-d / 2)
+
+   num / den
+ }
+}
+
+basis_smoothness <- sample_bases(idx = "holes")
+calc_smoothness(basis_smoothness)
+#> # PP index:     holes
+#> # No. of bases: 300 [6 x 2]
+#>     variance range smoothness nugget convergence
+#>        <dbl> <dbl>      <dbl>  <dbl> <lgl>      
+#> 1 0.00000672  18.1       1.03  1138. TRUE
+basis_squint <- sample_bases(idx = "holes", n_basis = 100, step_size = 0.01, min_proj_dist = 1.5)
+calc_squintability(basis_squint, method = "ks", bin_width = 0.01)
+#> # PP index:     holes
+#> # No. of bases: 100 -> 17159
+#> # method:       ks
+#>   max_x max_d squint
+#>   <dbl> <dbl>  <dbl>
+#> 1  1.87 0.482  0.901
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 1774d29..598d3da 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -2,7 +2,7 @@ pandoc: 3.1.1 pkgdown: 2.0.8 pkgdown_sha: ~ articles: {} -last_built: 2024-06-14T18:54Z +last_built: 2024-06-18T16:57Z urls: reference: https://huizezhang-sherry.github.io/ferrn/reference article: https://huizezhang-sherry.github.io/ferrn/articles diff --git a/docs/reference/figures/README-pca-plot-1.png b/docs/reference/figures/README-pca-plot-1.png index 646f38d..4d1d5cd 100644 Binary files a/docs/reference/figures/README-pca-plot-1.png and b/docs/reference/figures/README-pca-plot-1.png differ diff --git a/docs/reference/index.html b/docs/reference/index.html index 514b3a5..5eb11bf 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -185,7 +185,7 @@

Calculate projecti

-

calc_smoothness() print(<smoothness_res>) tbl_sum(<smoothness_res>) calc_squintability() fit_ks() fit_nls()

+

sample_bases() print(<basis_df>) tbl_sum(<basis_df>) calc_smoothness() print(<smoothness_res>) tbl_sum(<smoothness_res>) calc_squintability() print(<squintability_res>) tbl_sum(<squintability_res>) fit_ks() fit_nls()

Function to calculate smoothness and squintability

diff --git a/inst/reference.bib b/inst/reference.bib new file mode 100644 index 0000000..bf8d38b --- /dev/null +++ b/inst/reference.bib @@ -0,0 +1,25 @@ +@article{RJ-2021-105, + author = {Zhang, H. Sherry and Cook, Dianne and Laa, Ursula and Langrené, Nicolas and Menéndez, Patricia}, + title = {Visual Diagnostics for Constrained Optimisation with Application to Guided Tours}, + journal = {The R Journal}, + year = {2021}, + doi = {10.32614/RJ-2021-105}, + volume = {13}, + issue = {2}, + issn = {2073-4859}, + pages = {624-641} +} + +@article{laa_using_2020, + title = {Using tours to visually investigate properties of new projection pursuit indexes with application to problems in physics}, + volume = {35}, + issn = {1613-9658}, + doi = {10.1007/s00180-020-00954-8}, + number = {3}, + urldate = {2024-01-04}, + journal = {Computational Statistics}, + author = {Laa, Ursula and Cook, Dianne}, + month = sep, + year = {2020}, + pages = {1171--1205} +} diff --git a/man/figures/README-pca-plot-1.png b/man/figures/README-pca-plot-1.png index 646f38d..4d1d5cd 100644 Binary files a/man/figures/README-pca-plot-1.png and b/man/figures/README-pca-plot-1.png differ diff --git a/man/optim.Rd b/man/optim.Rd index e6c1e5f..799b143 100644 --- a/man/optim.Rd +++ b/man/optim.Rd @@ -114,17 +114,7 @@ Function to calculate smoothness and squintability \examples{ # define the holes index as per tourr::holes library(GpGp) -holes <- function() { - function(mat) { - n <- nrow(mat) - d <- ncol(mat) - - num <- 1 - 1 / n * sum(exp(-0.5 * rowSums(mat^2))) - den <- 1 - exp(-d / 2) - - num / den - } -} +library(tourr) basis_smoothness <- sample_bases(idx = "holes") calc_smoothness(basis_smoothness) basis_squint <- sample_bases(idx = "holes", n_basis = 100, step_size = 0.01, min_proj_dist = 1.5)