Merge pull request #25 from b-cubed-eu/update0.2.3

Update0.2.3
b-cubed-eu · Sep 12, 2024 · d1c4711 · d1c4711
2 parents 6e76d1d + 8d3eeb2
commit d1c4711
Show file tree

Hide file tree

Showing 9 changed files with 390 additions and 157 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -31,14 +31,14 @@ Imports:
     rnaturalearth,
     sf,
     stringr,
-    taxize,
     tibble,
     tidyr,
     units
 Suggests: 
     knitr,
     rmarkdown,
     rnaturalearthdata,
+    taxize,
     testthat (>= 3.0.0)
 Config/testthat/edition: 3
 Encoding: UTF-8

diff --git a/NAMESPACE b/NAMESPACE
@@ -47,6 +47,7 @@ S3method(print,indicator_map)
 S3method(print,indicator_ts)
 S3method(print,processed_cube)
 S3method(print,processed_cube_dsinfo)
+S3method(print,sim_cube)
 export("%>%")
 export(ab_rarity_map)
 export(ab_rarity_ts)

diff --git a/R/constructor_functions.R b/R/constructor_functions.R
@@ -66,6 +66,47 @@ new_processed_cube <- function(x, grid_type) {
   }
 }
 
+#' @title 'sim_cube' S3 Constructor
+#'
+#' @description This function constructs a 'sim_cube' S3 object, a specialized data
+#' structure designed for analysis of simulated biodiversity data (e.g. from the gcube
+#' package) that lacks spatial information. It validates the input data cube, calculates
+#' essential summary information, and prepares the object for further use.
+#'
+#' @param x A tibble data cube containing simulated occurrence data.
+#'
+#' @return  A 'gcube' S3 object containing:
+#'   * **Summary statistics:** First and last year of data, number of species, number of observations.
+#'   * **Original data:** The input data cube.
+#'
+#' @noRd
+new_sim_cube <- function(x, grid_type) {
+  # check that x is a tibble and all necessary columns are present
+  stopifnot(tibble::is_tibble(x),
+            all(c("year",
+                  "taxonKey",
+                  "obs") %in% names(x)))
+    structure(list(first_year = min(x$year),
+                   last_year = max(x$year),
+                   coord_range = ifelse(("xcoord" %in% colnames(x) & "ycoord" %in% colnames(x)),
+                                        list("xmin" = min(x$xcoord),
+                                             "xmax" = max(x$xcoord),
+                                             "ymin" = min(x$ycoord),
+                                             "ymax" = max(x$ycoord)),
+                                        "Coordinates not provided"),
+                   num_cells = ifelse("cellCode" %in% colnames(x), length(unique(x$cellCode)), "No cell codes provided"),
+                   num_species = length(unique(x$taxonKey)),
+                   num_obs = sum(x$obs),
+                   kingdoms = ifelse("kingdom" %in% colnames(x), c(unique(x$kingdom)), "Data not present"),
+                   num_families = ifelse("family" %in% colnames(x), length(unique(x$family)), "Data not present"),
+                   grid_type = grid_type,
+                   if ("resolution" %in% colnames(x)) {
+                     resolutions = unique(x$resolution)
+                   },
+                   data = x),
+              class = "sim_cube")
+}
+
 
 #' @title 'indicator_ts' S3 Constructor
 #'

diff --git a/R/indicator_wrappers.R b/R/indicator_wrappers.R
@@ -759,6 +759,9 @@ spec_range_ts <- function(data, ...) {
 #'
 #' @export
 tax_distinct_map <- function(data, rows = 1, ...) {
+  if (!requireNamespace("taxize", quietly = TRUE)) {
+    stop("The package {taxize} is required for this action")
+  }
   compute_indicator_workflow(data,
                              type = "tax_distinct",
                              dim_type = "map",
@@ -786,6 +789,9 @@ tax_distinct_map <- function(data, rows = 1, ...) {
 #'
 #' @export
 tax_distinct_ts <- function(data, rows = 1, ...) {
+  if (!requireNamespace("taxize", quietly = TRUE)) {
+    stop("The package {taxize} is required for this action")
+  }
   compute_indicator_workflow(data,
                              type = "tax_distinct",
                              dim_type = "ts",

diff --git a/R/print_methods.R b/R/print_methods.R
@@ -21,8 +21,10 @@ print.indicator_ts <- function(x, n = 10, ...) {
   cat("Coordinate range represented:\n")
   print(x$coord_range)
   cat("\nNumber of species represented:", x$num_species, "\n")
+  if (!is.null(x$num_families)) {cat("Number of families represented:",
+                                     paste(x$num_families, collapse = ", "), "\n\n")}
   if (!is.null(x$kingdoms)) {cat("Kingdoms represented:", x$kingdoms, "\n")}
-  if (!is.null(x$families)) {cat("Families represented:", x$families, "\n")}
+ # if (!is.null(x$families)) {cat("Families represented:", x$families, "\n")}
   cat("\nFirst", n, "rows of data (use n = to show more):\n\n")
   print(x$data, n = n, ...)
 }
@@ -129,6 +131,37 @@ print.processed_cube_dsinfo <- function(x, n = 10, ...) {
   print(x$data, n = n, ...)
 }
 
+#' @title Print a Simulated Data Cube Object
+#'
+#' @description Provides a summary representation of a sim_cube object,
+#'   designed for user-friendly display in the console.
+#'
+#' @method print sim_cube
+#'
+#' @param x A sim_cube object.
+#' @param n Integer specifying the number of rows of cube data to display.
+#' @param ... Additional arguments.
+#'
+#'
+#' @export
+print.sim_cube <- function(x, n = 10, ...) {
+  cat("\nSimulated data cube for calculating biodiversity indicators\n\n")
+  cat("Date Range:", x$first_year, "-", x$last_year, "\n")
+  if (!is.null(x$resolutions)) {
+    cat("Single-resolution cube with cell size", x$resolutions, "^2\n")
+  }
+  cat("Number of cells:", x$num_cells, "\n")
+  cat("Grid reference system:", x$grid_type, "\n")
+  cat("Coordinate range:\n")
+  print(unlist(x$coord_range))
+  cat("\nTotal number of observations:", x$num_obs, "\n")
+  cat("Number of species represented:", x$num_species, "\n")
+  cat("Number of families represented:", paste(x$num_families, collapse = ", "), "\n\n")
+  cat("Kingdoms represented:", paste(x$kingdoms, collapse = ", "), "\n\n")
+  cat("First", n, "rows of data (use n = to show more):\n\n")
+  print(x$data, n = n, ...)
+}
+
 #' @title Print Available Indicators
 #' @description Provide a summary of indicators registered in the package.
 #'

diff --git a/R/process_cube_functions.R b/R/process_cube_functions.R
@@ -25,6 +25,9 @@
 #'   specified, uses the latest year present in the cube.
 #' @param grid_type Specify which grid reference system your cube uses. By default
 #'  the function will attempt to determine this automatically and return an error if it fails.
+#'  If you want to perform analysis on a cube with custom grid codes (e.g. output
+#'  from the gcube package) or a cube without grid codes, select 'custom' or 'none',
+#'  respectively.
 #' @param force_gridcode Force the function to assume a specific grid reference system.
 #'  This may cause unexpected downstream issues, so it is not recommended. If you are
 #'  getting errors related to grid cell codes, check to make sure they are valid.
@@ -80,7 +83,7 @@
 #' }
 #' @export
 process_cube <- function(cube_name,
-                         grid_type = c("automatic", "eea", "mgrs", "eqdgc"),
+                         grid_type = c("automatic", "eea", "mgrs", "eqdgc", "custom", "none"),
                          first_year = NULL,
                          last_year = NULL,
                          force_gridcode = FALSE,
@@ -101,13 +104,28 @@ process_cube <- function(cube_name,
                          cols_sex = NULL,
                          cols_lifeStage = NULL) {
 
-  # Read in data cube
-  occurrence_data <- readr::read_delim(
-    file = cube_name,
-    delim = "\t",
-    na = "",
-    show_col_types = FALSE
-  )
+ # data_type = match.arg(data_type)
+
+  if (is.character(cube_name) & length(cube_name == 1)) {
+
+    # Read in data cube
+    occurrence_data <- readr::read_delim(
+      file = cube_name,
+      delim = "\t",
+      na = "",
+      show_col_types = FALSE
+    )
+
+  } else if (inherits(cube_name, "data.frame")) {
+
+    # Read in data cube
+    occurrence_data <- tibble::as_tibble(cube_name)
+
+  } else {
+
+    stop("`cube_name` should be a file path or dataframe.")
+
+  }
 
   grid_type = match.arg(grid_type)
 
@@ -164,6 +182,35 @@ process_cube <- function(cube_name,
 
     }
 
+    # if the user has chosen 'custom' as a grid type...
+  } else if (grid_type == "custom") {
+
+    # check if the user has provided a name for the column containing grid cell codes
+    if (is.null(cols_cellCode)) {
+
+      stop("You have chosen custom grid type. Please provide the name of the column containing grid cell codes.")
+
+    }
+
+
+    # check that the column name they provided exists
+    if (!cols_cellCode %in% names(occurrence_data)) {
+
+      stop("The column name you provided for grid cell codes does not exist. Please double check that you spelled it correctly.")
+
+    }
+
+    # rename it to the default
+    occurrence_data <-
+      occurrence_data %>%
+      dplyr::rename(cellCode = cols_cellCode)
+
+    # if the user has chosen 'none' as a grid type...
+  } else if (grid_type == "none") {
+
+    # create dummy column full of zeros
+    #  occurrence_data$cellCode <- 0
+
     # if the user has specified a grid type...
   } else {
 
@@ -184,7 +231,7 @@ process_cube <- function(cube_name,
 
     }
 
-    if (force_gridcode == FALSE) {
+    if (force_gridcode == FALSE & grid_type!="none") {
 
       grid_type_test <- ifelse(grid_type == "eea", stringr::str_detect(occurrence_data[[cols_cellCode]], "^[0-9]{1,3}[km]{1,2}[EW]{1}[0-9]{2,7}[NS]{1}[0-9]{2,7}$"),
                                ifelse(grid_type == "mgrs", stringr::str_detect(occurrence_data[[cols_cellCode]], "^[0-9]{2}[A-Z]{3}[0-9]{0,10}$"),
@@ -249,8 +296,9 @@ process_cube <- function(cube_name,
   col_names <- data.frame("default" = unlist(col_names_defaultlist), "user" = unlist(col_names_userlist))
 
   # rename user-supplied column names to defaults expected by package functions
-  names(occurrence_data)[names(occurrence_data) %in% col_names[,2]] <-
-    col_names[,1][col_names[,2] %in% (names(occurrence_data))]
+  for (i in (which(names(occurrence_data) %in% col_names[,2]))) {
+    names(occurrence_data)[i] <- col_names[,1][which(col_names[,2] %in% names(occurrence_data)[i])]
+  }
 
   # for (i in 1:length(col_names_userlist)) {
   #
@@ -309,16 +357,34 @@ process_cube <- function(cube_name,
 
   }
 
+  essential_cols <- c("year",
+                      "occurrences",
+                      "minCoordinateUncertaintyInMeters",
+                      "minTemporalUncertainty",
+                      "kingdomKey",
+                      "familyKey",
+                      "speciesKey",
+                      "familyCount")
+  # make sure that essential columns are the correct type
+  occurrence_data <-
+    occurrence_data %>%
+    dplyr::mutate(across(any_of(essential_cols), as.numeric))
+
+
   # rename occurrences and speciesKey columns to be consistent with the other package functions (should maybe change this throughout package?)
   occurrence_data <-
     occurrence_data %>%
     dplyr::rename(obs = occurrences) %>%
     dplyr::rename(taxonKey = speciesKey)
 
-  # Remove NA values in cell code column
-  occurrence_data <-
-    occurrence_data %>%
-    dplyr::filter(!is.na(cellCode))
+  if (grid_type != "none") {
+
+    # Remove NA values in cell code column
+    occurrence_data <-
+      occurrence_data %>%
+      dplyr::filter(!is.na(cellCode))
+
+  }
 
   if (grid_type == "eea") {
 
@@ -399,34 +465,52 @@ process_cube <- function(cube_name,
 
   }
 
+  if(min(occurrence_data$year, na.rm = TRUE)==max(occurrence_data$year, na.rm = TRUE)) {
 
-  # Check whether start and end years are within dataset
-  first_year <- occurrence_data %>%
-    dplyr::select(year) %>%
-    min(na.rm = TRUE) %>%
-    ifelse(is.null(first_year),
-           .,
-           ifelse(first_year > ., first_year, .))
-  last_year <- occurrence_data %>%
-    dplyr::summarize(max_year = max(year, na.rm = TRUE)-1) %>%
-    dplyr::pull(max_year) %>%
-    ifelse(is.null(last_year),
-           .,
-           ifelse(last_year < ., last_year, .))
+    first_year <- min(occurrence_data$year)
+    last_year <- first_year
 
-  # Limit data set
-  occurrence_data <-
-    occurrence_data %>%
-    dplyr::filter(year >= first_year) %>%
-    dplyr::filter(year <= last_year)
+    warning("Cannot create trends with this dataset, as occurrences are all from the same year.")
+
+  } else {
+
+    # Check whether start and end years are within dataset
+    first_year <- occurrence_data %>%
+      dplyr::select(year) %>%
+      min(na.rm = TRUE) %>%
+      ifelse(is.null(first_year),
+             .,
+             ifelse(first_year > ., first_year, .))
+    last_year <- occurrence_data %>%
+     # dplyr::summarize(max_year = max(year, na.rm = TRUE)-1) %>%
+      dplyr::summarize(max_year = max(year, na.rm = TRUE)) %>%
+      dplyr::pull(max_year) %>%
+      ifelse(is.null(last_year),
+             .,
+             ifelse(last_year < ., last_year, .))
+
+    # Limit data set
+    occurrence_data <-
+      occurrence_data %>%
+      dplyr::filter(year >= first_year) %>%
+      dplyr::filter(year <= last_year)
+
+  }
 
   # Remove any duplicate rows
   occurrence_data <-
     occurrence_data %>%
     dplyr::distinct() %>%
     dplyr::arrange(year)
 
-  cube <- new_processed_cube(occurrence_data, grid_type)
+  if (grid_type == "none" | grid_type == "custom") {
+
+    cube <- new_sim_cube(occurrence_data, grid_type)
+
+  } else {
+
+    cube <- new_processed_cube(occurrence_data, grid_type)
+  }
 
 }
 
@@ -573,3 +657,4 @@ process_cube_old <- function(cube_name,
   cube <- new_processed_cube(merged_data, grid_type = "eea")
 
 }
+