Update automated tests & controls/inputs/outputs

bioRgeo · Apr 4, 2024 · 6295f8f · 6295f8f
1 parent a00e363
commit 6295f8f
Show file tree

Hide file tree

Showing 17 changed files with 1,481 additions and 594 deletions.
diff --git a/R/cut_tree.R b/R/cut_tree.R
@@ -200,7 +200,7 @@ cut_tree <- function(tree,
 
   arg_added <- list(...)
   if(inherits(tree, "bioregion.clusters")){
-    if (tree$name == "hierarchical_clustering") {
+    if (tree$name == "hclu_hierarclust") {
       cur.tree <- tree$algorithm$final.tree
       # Update args
       tree$args[c("n_clust", "cut_height", "find_h", "h_max", "h_min",
@@ -321,7 +321,7 @@ cut_tree <- function(tree,
     output_cut_height <- cut_height
   }
 
-  clusters <- knbclu(clusters, reorder = FALSE, method = "length")
+  clusters <- knbclu(clusters, reorder = TRUE, method = "length")
 
   if(inherits(tree, "bioregion.clusters")) {
     cur.tree$args$cut_height <- cut_height

diff --git a/R/generic_functions.R b/R/generic_functions.R
@@ -18,15 +18,15 @@ print.bioregion.clusters <- function(x, ...)
   # algorithm name -----
   cat("Clustering results for algorithm : ")
   cat(x$name, "\n")
-  if(x$name == "hierarchical_clustering") {
+  if(x$name == "hclu_hierarclust") {
     cat("\t(hierarchical clustering based on a dissimilarity matrix)\n")
   }
 
   # dataset characteristics -----
   cat(" - Number of sites: ", x$inputs$nb_sites, "\n")
 
   # methodological details -----
-  if(x$name == "hierarchical_clustering") {
+  if(x$name == "hclu_hierarclust") {
     cat(" - Name of dissimilarity metric: ",
         ifelse(is.null(x$args$index),
                "Undefined",
@@ -44,7 +44,7 @@ print.bioregion.clusters <- function(x, ...)
   if (inherits(x$clusters, "data.frame")) {
 
     # Further methodological details if hclust
-    if(x$name == "hierarchical_clustering") {
+    if(x$name == "hclu_hierarclust") {
       if(!is.null(x$args$n_clust))
       {
         cat(" - Number of clusters requested by the user: ",
@@ -97,7 +97,7 @@ print.bioregion.clusters <- function(x, ...)
                paste(nclust, collapse = " ")),
         "\n")
 
-    if(x$name == "hierarchical_clustering") {
+    if(x$name == "hclu_hierarclust") {
       if(x$args$find_h)
       {
         cat(" - Height of cut of the hierarchical tree:",
@@ -124,7 +124,7 @@ print.bioregion.clusters <- function(x, ...)
 #' @method plot bioregion.clusters
 plot.bioregion.clusters <- function(x, ...)
 {
-  if(x$name == ("hierarchical_clustering"))
+  if(x$name == ("hclu_hierarclust"))
   {
     args <- list(...)
     # Changing default arguments for hclust plot

diff --git a/R/hclu_diana.R b/R/hclu_diana.R
@@ -18,24 +18,25 @@
 #' @param index name or number of the dissimilarity column to use. By default, 
 #' the third column name of `dissimilarity` is used.
 #'  
-#' @param n_clust an integer or a vector of integers indicating the number of
+#' @param n_clust an `integer` or an `integer` vector indicating the number of
 #' clusters to be obtained from the hierarchical tree, or the output from
 #' [partition_metrics]. Should not be used at the same time as
 #' `cut_height`.
 #' 
-#' @param cut_height a numeric vector indicating the height(s) at which the
+#' @param cut_height a `numeric` vector indicating the height(s) at which the
 #' tree should be cut. Should not be used at the same time as `n_clust`.
 #' 
-#' @param find_h a boolean indicating if the height of cut should be found for
+#' @param find_h a `boolean` indicating if the height of cut should be found for
 #' the requested `n_clust`.
 #' 
-#' @param h_max a numeric indicating the maximum possible tree height for
+#' @param h_max a `numeric` indicating the maximum possible tree height for
 #' the chosen `index`.
 #' 
-#' @param h_min a numeric indicating the minimum possible height in the tree
+#' @param h_min a `numeric` indicating the minimum possible height in the tree
 #' for the chosen `index`.
 #' 
 #' @details
+#' The function is based on [diana][cluster::diana].
 #' Chapter 6 of Kaufman and Rousseeuw (1990) fully details the functioning of
 #' the diana algorithm.
 #'
@@ -44,12 +45,12 @@
 #' @return
 #' A `list` of class `bioregion.clusters` with five slots:
 #' \enumerate{
-#' \item{**name**: `character string` containing the name of the algorithm}
+#' \item{**name**: `character` containing the name of the algorithm}
 #' \item{**args**: `list` of input arguments as provided by the user}
 #' \item{**inputs**: `list` of characteristics of the clustering process}
 #' \item{**algorithm**: `list` of all objects associated with the
 #'  clustering procedure, such as original cluster objects}
-#' \item{**clusters**: `data.frame` containing the clustering results}}#'
+#' \item{**clusters**: `data.frame` containing the clustering results}}
 #'
 #' @references
 #' \insertRef{Kaufman2009}{bioregion}
@@ -88,118 +89,91 @@ hclu_diana <- function(dissimilarity,
                        h_min = 0){
 
   # 1. Controls ---------------------------------------------------------------
-  if(inherits(dissimilarity, "bioregion.pairwise.metric")){
-    if(attr(dissimilarity, "type") == "similarity") {
-      stop("dissimilarity seems to be a similarity object.
-         hclu_hierarclust() should be applied on dissimilarity, not
-         similarities.
-         Use similarity_to_dissimilarity() before using hclu_hierarclust()")
-    }
-    if(is.numeric(index)){
-      index <- names(dissimilarity)[index]
-    }
-    if(!(index %in% colnames(dissimilarity))) {
-      stop("Argument index should be one of the column names of dissimilarity")
-    }
-
-  } else if(!any(inherits(dissimilarity, "bioregion.pairwise.metric"),
-                 inherits(dissimilarity, "dist"))){
-    if(is.numeric(index)) {
-      index <- names(dissimilarity)[index]
-    } 
-    if(is.null(index) || !(index %in% colnames(dissimilarity))) {
-      stop("dissimilarity is not a bioregion.pairwise.metric object, a
-           dissimilarity matrix (class dist) or a data.frame with at least 3
-           columns (site1, site2, and your dissimilarity index).")
+  controls(args = NULL, data = dissimilarity, type = "input_nhandhclu")
+  if(!inherits(dissimilarity, "dist")){
+    controls(args = NULL, data = dissimilarity, type = "input_dissimilarity")
+    controls(args = NULL, data = dissimilarity, 
+             type = "input_data_frame_nhandhclu")
+    controls(args = index, data = dissimilarity, type = "input_net_index")
+    net <- dissimilarity
+    net[, 3] <- net[, index]
+    net <- net[, 1:3]
+    controls(args = NULL, data = net, type = "input_net_index_value")
+    dist.obj <- stats::as.dist(
+      net_to_mat(net,
+                 weight = TRUE, squared = TRUE, symmetrical = TRUE))
+  } else {
+    controls(args = NULL, data = dissimilarity, type = "input_dist")
+    dist.obj <- dissimilarity
+    if(is.null(labels(dist.obj))){
+      attr(dist.obj, "Labels") <- paste0(1:attr(dist.obj, "Size"))
+      message("No labels detected, they have been assigned automatically.")
     }
   }
 
   if(!is.null(n_clust)) {
     if(is.numeric(n_clust)) {
-      if(any(!(n_clust %% 1 == 0))) {
-        stop("n_clust must an integer or a vector of integers determining the
-             number of clusters.")
-      }
+      controls(args = n_clust, data = NULL, 
+               type = "strict_positive_integer_vector")
     } else if(inherits(n_clust, "bioregion.partition.metrics")){
       if(!is.null(n_clust$algorithm$optimal_nb_clusters)) {
         n_clust <- n_clust$algorithm$optimal_nb_clusters
       } else {
         stop("n_clust does not have an optimal number of clusters. Did you
-        specify partition_optimisation = TRUE in partition_metrics()?")
+        specify partition_optimisation = TRUE in partition_metrics()?", 
+             call. = FALSE)
       }
     } else{
       stop("n_clust must be one of those:
         * an integer determining the number of clusters
         * a vector of integers determining the numbers of clusters for each cut
-        * the output from partition_metrics()")
+        * the output from partition_metrics()", 
+           call. = FALSE)
     }
     if(!is.null(cut_height)){
       stop("Please provide either n_clust or cut_height, but not both at the
-           same time.")
+           same time.", 
+           call. = FALSE)
     }
   }
-
   if(!is.null(cut_height)){
-    if(!is.numeric(cut_height) || any(cut_height < 0)){
-      stop("cut_height must be a positive integer.")
-    }
-  }
-
-  if(!is.logical(find_h)){
-    stop("find_h must be a Boolean.")
+    controls(args = cut_height, data = NULL, type = "positive_numeric_vector")
   }
-
-  if(!is.numeric(h_max) || h_max < 0){
-    stop("h_max must be a positive integer.")
-  }
-
-  if(!is.numeric(h_min) || h_min < 0){
-    stop("h_min must be a positive integer.")
-  }
-
+  controls(args = find_h, data = NULL, type = "boolean")
+  controls(args = h_min, data = NULL, type = "positive_numeric")
+  controls(args = h_max, data = NULL, type = "positive_numeric")
   if(h_min > h_max){
     stop("h_min must be inferior to h_max.")
   }
 
   # 2. Function ---------------------------------------------------------------
-  # Convert dissimilarity into a dist object
-  if(!inherits(dissimilarity, "dist")){
-    # dist.obj <- .dfToDist(dissimilarity, metric = index)
-    dist.obj <- stats::as.dist(
-      net_to_mat(dissimilarity[, c(colnames(dissimilarity)[1:2], index)],
-                 weight = TRUE, squared = TRUE, symmetrical = TRUE))
-
-  } else {
-    dist.obj <- dissimilarity
-  }
-
   # Output of the function
-  outputs <- list(name = "divisive_hierarchical_clustering")
+  outputs <- list(name = "hclu_diana")
 
   # Adding dynamic_tree_cut = FALSE for compatibility with generic functions
   dynamic_tree_cut <- FALSE
   outputs$args <- list(index = index,
-                       # method = method,
-                       # randomize = randomize,
-                       # n_runs = n_runs,
-                       # optimal_tree_method = optimal_tree_method,
                        n_clust = n_clust,
                        cut_height = cut_height,
                        find_h = find_h,
                        h_max = h_max,
                        h_min = h_min,
                        dynamic_tree_cut = dynamic_tree_cut)
 
-  outputs$inputs <- list(nb_sites = attr(dist.obj, "Size"))
+  outputs$inputs <- list(bipartite = FALSE,
+                         weight = TRUE,
+                         pairwise = TRUE,
+                         pairwise_metric = ifelse(!inherits(dissimilarity, 
+                                                            "dist"), 
+                                                  ifelse(is.numeric(index), 
+                                                         names(net)[3], index), 
+                                                  NA),
+                         dissimilarity = TRUE,
+                         nb_sites = attr(dist.obj, "Size"))
 
   # DIANA clustering
   diana_clust <- cluster::diana(dist.obj,
                                 diss = inherits(dist.obj, "dist"),
-                                # metric = "euclidean",
-                                # stand = FALSE,
-                                # stop.at.k = FALSE,
-                                # keep.diss = n < 100,
-                                # keep.data = !diss,
                                 trace.lev = 0)
 
   outputs$algorithm$final.tree <- diana_clust