From 294bba8f6f0a9fc18dd631a02f0dda072f6c87ed Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 17 Sep 2023 11:07:17 +0200 Subject: [PATCH] PCA with n = 1 (#904) --- DESCRIPTION | 2 +- NEWS.md | 7 +++++++ R/principal_components.R | 27 +++++++++++---------------- man/principal_components.Rd | 15 ++++++++------- man/reduce_parameters.Rd | 15 ++++++++------- tests/testthat/test-pca.R | 21 ++++++++++++--------- 6 files changed, 47 insertions(+), 40 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d77d1950d..cdca7f002 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: parameters Title: Processing of Model Parameters -Version: 0.21.2 +Version: 0.21.2.1 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index c52c21fde..d8bcfe9f6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# parameters 0.21.3 + +## Changes + +* `principal_components()` and `factor_analysis()` now also work when argument + `n = 1`. + # parameters 0.21.2 ## Changes diff --git a/R/principal_components.R b/R/principal_components.R index 371befa30..2166507eb 100644 --- a/R/principal_components.R +++ b/R/principal_components.R @@ -7,13 +7,14 @@ #' Details section). #' #' @param x A data frame or a statistical model. -#' @param n Number of components to extract. If `n="all"`, then `n` is -#' set as the number of variables minus 1 (`ncol(x)-1`). If -#' `n="auto"` (default) or `n=NULL`, the number of components is -#' selected through [`n_factors()`] resp. [`n_components()`]. -#' In [`reduce_parameters()`], can also be `"max"`, in which case -#' it will select all the components that are maximally pseudo-loaded (i.e., -#' correlated) by at least one variable. +#' @param n Number of components to extract. If `n="all"`, then `n` is set as +#' the number of variables minus 1 (`ncol(x)-1`). If `n="auto"` (default) or +#' `n=NULL`, the number of components is selected through [`n_factors()`] resp. +#' [`n_components()`]. Else, if `n` is a number, `n` components are extracted. +#' If `n` exceeds number of variables in the data, it is automatically set to +#' the maximum number (i.e. `ncol(x)`). In [`reduce_parameters()`], can also +#' be `"max"`, in which case it will select all the components that are +#' maximally pseudo-loaded (i.e., correlated) by at least one variable. #' @param rotation If not `"none"`, the PCA / FA will be computed using the #' **psych** package. Possible options include `"varimax"`, #' `"quartimax"`, `"promax"`, `"oblimin"`, `"simplimax"`, @@ -418,15 +419,9 @@ principal_components.data.frame <- function(x, } else if (n == "all") { n <- ncol(x) - 1 } else if (n >= ncol(x)) { - n <- ncol(x) - 1 - } - - ## TODO: the next if-statement was removed by Dom, but this breaks - ## performance code. Need to check, so we for now add this back - - # sanity check - we need at least two factors - if (n < 2 && ncol(x) >= 2) { - n <- 2 + n <- ncol(x) + } else if (n < 1) { + n <- 1 } n } diff --git a/man/principal_components.Rd b/man/principal_components.Rd index ebfbbdcd2..c0a644356 100644 --- a/man/principal_components.Rd +++ b/man/principal_components.Rd @@ -53,13 +53,14 @@ closest_component(pca_results) \arguments{ \item{x}{A data frame or a statistical model.} -\item{n}{Number of components to extract. If \code{n="all"}, then \code{n} is -set as the number of variables minus 1 (\code{ncol(x)-1}). If -\code{n="auto"} (default) or \code{n=NULL}, the number of components is -selected through \code{\link[=n_factors]{n_factors()}} resp. \code{\link[=n_components]{n_components()}}. -In \code{\link[=reduce_parameters]{reduce_parameters()}}, can also be \code{"max"}, in which case -it will select all the components that are maximally pseudo-loaded (i.e., -correlated) by at least one variable.} +\item{n}{Number of components to extract. If \code{n="all"}, then \code{n} is set as +the number of variables minus 1 (\code{ncol(x)-1}). If \code{n="auto"} (default) or +\code{n=NULL}, the number of components is selected through \code{\link[=n_factors]{n_factors()}} resp. +\code{\link[=n_components]{n_components()}}. Else, if \code{n} is a number, \code{n} components are extracted. +If \code{n} exceeds number of variables in the data, it is automatically set to +the maximum number (i.e. \code{ncol(x)}). In \code{\link[=reduce_parameters]{reduce_parameters()}}, can also +be \code{"max"}, in which case it will select all the components that are +maximally pseudo-loaded (i.e., correlated) by at least one variable.} \item{rotation}{If not \code{"none"}, the PCA / FA will be computed using the \strong{psych} package. Possible options include \code{"varimax"}, diff --git a/man/reduce_parameters.Rd b/man/reduce_parameters.Rd index 40c6ff827..a039544e7 100644 --- a/man/reduce_parameters.Rd +++ b/man/reduce_parameters.Rd @@ -15,13 +15,14 @@ reduce_data(x, method = "PCA", n = "max", distance = "euclidean", ...) \item{method}{The feature reduction method. Can be one of \code{"PCA"}, \code{"cMDS"}, \code{"DRR"}, \code{"ICA"} (see the 'Details' section).} -\item{n}{Number of components to extract. If \code{n="all"}, then \code{n} is -set as the number of variables minus 1 (\code{ncol(x)-1}). If -\code{n="auto"} (default) or \code{n=NULL}, the number of components is -selected through \code{\link[=n_factors]{n_factors()}} resp. \code{\link[=n_components]{n_components()}}. -In \code{\link[=reduce_parameters]{reduce_parameters()}}, can also be \code{"max"}, in which case -it will select all the components that are maximally pseudo-loaded (i.e., -correlated) by at least one variable.} +\item{n}{Number of components to extract. If \code{n="all"}, then \code{n} is set as +the number of variables minus 1 (\code{ncol(x)-1}). If \code{n="auto"} (default) or +\code{n=NULL}, the number of components is selected through \code{\link[=n_factors]{n_factors()}} resp. +\code{\link[=n_components]{n_components()}}. Else, if \code{n} is a number, \code{n} components are extracted. +If \code{n} exceeds number of variables in the data, it is automatically set to +the maximum number (i.e. \code{ncol(x)}). In \code{\link[=reduce_parameters]{reduce_parameters()}}, can also +be \code{"max"}, in which case it will select all the components that are +maximally pseudo-loaded (i.e., correlated) by at least one variable.} \item{distance}{The distance measure to be used. Only applies when \code{method = "cMDS"}. This must be one of \code{"euclidean"}, \code{"maximum"}, diff --git a/tests/testthat/test-pca.R b/tests/testthat/test-pca.R index dd7791410..b6dba5bd1 100644 --- a/tests/testthat/test-pca.R +++ b/tests/testthat/test-pca.R @@ -19,10 +19,17 @@ test_that("principal_components", { tolerance = 0.01 ) - expect_equal( - colnames(x), - c("Variable", "RC1", "RC2", "Complexity", "Uniqueness", "MSA") - ) + expect_named(x, c("Variable", "RC1", "RC2", "Complexity", "Uniqueness", "MSA")) +}) + + +test_that("principal_components, n", { + data(iris) + x <- parameters::principal_components(iris[1:4], n = 2) + expect_named(x, c("Variable", "PC1", "PC2", "Complexity")) + + x <- parameters::principal_components(iris[1:4], n = 1) + expect_named(x, c("Variable", "PC1", "Complexity")) }) @@ -43,17 +50,13 @@ test_that("principal_components", { tolerance = 0.01 ) - expect_equal( - colnames(x), - c("Variable", "PC1", "PC2", "Complexity") - ) + expect_named(x, c("Variable", "PC1", "PC2", "Complexity")) }) # predict ---------------------- # N.B tests will fail if `GPArotation` package is not installed -require("GPArotation", quietly = TRUE) d <- na.omit(psych::bfi[, 1:25]) model <- psych::fa(d, nfactors = 5) mp <- model_parameters(model, sort = TRUE, threshold = "max")