Skip to content

Commit

Permalink
Merge pull request #25 from b-cubed-eu/update0.2.3
Browse files Browse the repository at this point in the history
Update0.2.3
  • Loading branch information
shawndove authored Sep 12, 2024
2 parents 6e76d1d + 8d3eeb2 commit d1c4711
Show file tree
Hide file tree
Showing 9 changed files with 390 additions and 157 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ Imports:
rnaturalearth,
sf,
stringr,
taxize,
tibble,
tidyr,
units
Suggests:
knitr,
rmarkdown,
rnaturalearthdata,
taxize,
testthat (>= 3.0.0)
Config/testthat/edition: 3
Encoding: UTF-8
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ S3method(print,indicator_map)
S3method(print,indicator_ts)
S3method(print,processed_cube)
S3method(print,processed_cube_dsinfo)
S3method(print,sim_cube)
export("%>%")
export(ab_rarity_map)
export(ab_rarity_ts)
Expand Down
41 changes: 41 additions & 0 deletions R/constructor_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,47 @@ new_processed_cube <- function(x, grid_type) {
}
}

#' @title 'sim_cube' S3 Constructor
#'
#' @description This function constructs a 'sim_cube' S3 object, a specialized data
#' structure designed for analysis of simulated biodiversity data (e.g. from the gcube
#' package) that lacks spatial information. It validates the input data cube, calculates
#' essential summary information, and prepares the object for further use.
#'
#' @param x A tibble data cube containing simulated occurrence data.
#'
#' @return A 'gcube' S3 object containing:
#' * **Summary statistics:** First and last year of data, number of species, number of observations.
#' * **Original data:** The input data cube.
#'
#' @noRd
new_sim_cube <- function(x, grid_type) {
# check that x is a tibble and all necessary columns are present
stopifnot(tibble::is_tibble(x),
all(c("year",
"taxonKey",
"obs") %in% names(x)))
structure(list(first_year = min(x$year),
last_year = max(x$year),
coord_range = ifelse(("xcoord" %in% colnames(x) & "ycoord" %in% colnames(x)),
list("xmin" = min(x$xcoord),
"xmax" = max(x$xcoord),
"ymin" = min(x$ycoord),
"ymax" = max(x$ycoord)),
"Coordinates not provided"),
num_cells = ifelse("cellCode" %in% colnames(x), length(unique(x$cellCode)), "No cell codes provided"),
num_species = length(unique(x$taxonKey)),
num_obs = sum(x$obs),
kingdoms = ifelse("kingdom" %in% colnames(x), c(unique(x$kingdom)), "Data not present"),
num_families = ifelse("family" %in% colnames(x), length(unique(x$family)), "Data not present"),
grid_type = grid_type,
if ("resolution" %in% colnames(x)) {
resolutions = unique(x$resolution)
},
data = x),
class = "sim_cube")
}


#' @title 'indicator_ts' S3 Constructor
#'
Expand Down
6 changes: 6 additions & 0 deletions R/indicator_wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -759,6 +759,9 @@ spec_range_ts <- function(data, ...) {
#'
#' @export
tax_distinct_map <- function(data, rows = 1, ...) {
if (!requireNamespace("taxize", quietly = TRUE)) {
stop("The package {taxize} is required for this action")
}
compute_indicator_workflow(data,
type = "tax_distinct",
dim_type = "map",
Expand Down Expand Up @@ -786,6 +789,9 @@ tax_distinct_map <- function(data, rows = 1, ...) {
#'
#' @export
tax_distinct_ts <- function(data, rows = 1, ...) {
if (!requireNamespace("taxize", quietly = TRUE)) {
stop("The package {taxize} is required for this action")
}
compute_indicator_workflow(data,
type = "tax_distinct",
dim_type = "ts",
Expand Down
35 changes: 34 additions & 1 deletion R/print_methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ print.indicator_ts <- function(x, n = 10, ...) {
cat("Coordinate range represented:\n")
print(x$coord_range)
cat("\nNumber of species represented:", x$num_species, "\n")
if (!is.null(x$num_families)) {cat("Number of families represented:",
paste(x$num_families, collapse = ", "), "\n\n")}
if (!is.null(x$kingdoms)) {cat("Kingdoms represented:", x$kingdoms, "\n")}
if (!is.null(x$families)) {cat("Families represented:", x$families, "\n")}
# if (!is.null(x$families)) {cat("Families represented:", x$families, "\n")}
cat("\nFirst", n, "rows of data (use n = to show more):\n\n")
print(x$data, n = n, ...)
}
Expand Down Expand Up @@ -129,6 +131,37 @@ print.processed_cube_dsinfo <- function(x, n = 10, ...) {
print(x$data, n = n, ...)
}

#' @title Print a Simulated Data Cube Object
#'
#' @description Provides a summary representation of a sim_cube object,
#' designed for user-friendly display in the console.
#'
#' @method print sim_cube
#'
#' @param x A sim_cube object.
#' @param n Integer specifying the number of rows of cube data to display.
#' @param ... Additional arguments.
#'
#'
#' @export
print.sim_cube <- function(x, n = 10, ...) {
cat("\nSimulated data cube for calculating biodiversity indicators\n\n")
cat("Date Range:", x$first_year, "-", x$last_year, "\n")
if (!is.null(x$resolutions)) {
cat("Single-resolution cube with cell size", x$resolutions, "^2\n")
}
cat("Number of cells:", x$num_cells, "\n")
cat("Grid reference system:", x$grid_type, "\n")
cat("Coordinate range:\n")
print(unlist(x$coord_range))
cat("\nTotal number of observations:", x$num_obs, "\n")
cat("Number of species represented:", x$num_species, "\n")
cat("Number of families represented:", paste(x$num_families, collapse = ", "), "\n\n")
cat("Kingdoms represented:", paste(x$kingdoms, collapse = ", "), "\n\n")
cat("First", n, "rows of data (use n = to show more):\n\n")
print(x$data, n = n, ...)
}

#' @title Print Available Indicators
#' @description Provide a summary of indicators registered in the package.
#'
Expand Down
153 changes: 119 additions & 34 deletions R/process_cube_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
#' specified, uses the latest year present in the cube.
#' @param grid_type Specify which grid reference system your cube uses. By default
#' the function will attempt to determine this automatically and return an error if it fails.
#' If you want to perform analysis on a cube with custom grid codes (e.g. output
#' from the gcube package) or a cube without grid codes, select 'custom' or 'none',
#' respectively.
#' @param force_gridcode Force the function to assume a specific grid reference system.
#' This may cause unexpected downstream issues, so it is not recommended. If you are
#' getting errors related to grid cell codes, check to make sure they are valid.
Expand Down Expand Up @@ -80,7 +83,7 @@
#' }
#' @export
process_cube <- function(cube_name,
grid_type = c("automatic", "eea", "mgrs", "eqdgc"),
grid_type = c("automatic", "eea", "mgrs", "eqdgc", "custom", "none"),
first_year = NULL,
last_year = NULL,
force_gridcode = FALSE,
Expand All @@ -101,13 +104,28 @@ process_cube <- function(cube_name,
cols_sex = NULL,
cols_lifeStage = NULL) {

# Read in data cube
occurrence_data <- readr::read_delim(
file = cube_name,
delim = "\t",
na = "",
show_col_types = FALSE
)
# data_type = match.arg(data_type)

if (is.character(cube_name) & length(cube_name == 1)) {

# Read in data cube
occurrence_data <- readr::read_delim(
file = cube_name,
delim = "\t",
na = "",
show_col_types = FALSE
)

} else if (inherits(cube_name, "data.frame")) {

# Read in data cube
occurrence_data <- tibble::as_tibble(cube_name)

} else {

stop("`cube_name` should be a file path or dataframe.")

}

grid_type = match.arg(grid_type)

Expand Down Expand Up @@ -164,6 +182,35 @@ process_cube <- function(cube_name,

}

# if the user has chosen 'custom' as a grid type...
} else if (grid_type == "custom") {

# check if the user has provided a name for the column containing grid cell codes
if (is.null(cols_cellCode)) {

stop("You have chosen custom grid type. Please provide the name of the column containing grid cell codes.")

}


# check that the column name they provided exists
if (!cols_cellCode %in% names(occurrence_data)) {

stop("The column name you provided for grid cell codes does not exist. Please double check that you spelled it correctly.")

}

# rename it to the default
occurrence_data <-
occurrence_data %>%
dplyr::rename(cellCode = cols_cellCode)

# if the user has chosen 'none' as a grid type...
} else if (grid_type == "none") {

# create dummy column full of zeros
# occurrence_data$cellCode <- 0

# if the user has specified a grid type...
} else {

Expand All @@ -184,7 +231,7 @@ process_cube <- function(cube_name,

}

if (force_gridcode == FALSE) {
if (force_gridcode == FALSE & grid_type!="none") {

grid_type_test <- ifelse(grid_type == "eea", stringr::str_detect(occurrence_data[[cols_cellCode]], "^[0-9]{1,3}[km]{1,2}[EW]{1}[0-9]{2,7}[NS]{1}[0-9]{2,7}$"),
ifelse(grid_type == "mgrs", stringr::str_detect(occurrence_data[[cols_cellCode]], "^[0-9]{2}[A-Z]{3}[0-9]{0,10}$"),
Expand Down Expand Up @@ -249,8 +296,9 @@ process_cube <- function(cube_name,
col_names <- data.frame("default" = unlist(col_names_defaultlist), "user" = unlist(col_names_userlist))

# rename user-supplied column names to defaults expected by package functions
names(occurrence_data)[names(occurrence_data) %in% col_names[,2]] <-
col_names[,1][col_names[,2] %in% (names(occurrence_data))]
for (i in (which(names(occurrence_data) %in% col_names[,2]))) {
names(occurrence_data)[i] <- col_names[,1][which(col_names[,2] %in% names(occurrence_data)[i])]
}

# for (i in 1:length(col_names_userlist)) {
#
Expand Down Expand Up @@ -309,16 +357,34 @@ process_cube <- function(cube_name,

}

essential_cols <- c("year",
"occurrences",
"minCoordinateUncertaintyInMeters",
"minTemporalUncertainty",
"kingdomKey",
"familyKey",
"speciesKey",
"familyCount")
# make sure that essential columns are the correct type
occurrence_data <-
occurrence_data %>%
dplyr::mutate(across(any_of(essential_cols), as.numeric))


# rename occurrences and speciesKey columns to be consistent with the other package functions (should maybe change this throughout package?)
occurrence_data <-
occurrence_data %>%
dplyr::rename(obs = occurrences) %>%
dplyr::rename(taxonKey = speciesKey)

# Remove NA values in cell code column
occurrence_data <-
occurrence_data %>%
dplyr::filter(!is.na(cellCode))
if (grid_type != "none") {

# Remove NA values in cell code column
occurrence_data <-
occurrence_data %>%
dplyr::filter(!is.na(cellCode))

}

if (grid_type == "eea") {

Expand Down Expand Up @@ -399,34 +465,52 @@ process_cube <- function(cube_name,

}

if(min(occurrence_data$year, na.rm = TRUE)==max(occurrence_data$year, na.rm = TRUE)) {

# Check whether start and end years are within dataset
first_year <- occurrence_data %>%
dplyr::select(year) %>%
min(na.rm = TRUE) %>%
ifelse(is.null(first_year),
.,
ifelse(first_year > ., first_year, .))
last_year <- occurrence_data %>%
dplyr::summarize(max_year = max(year, na.rm = TRUE)-1) %>%
dplyr::pull(max_year) %>%
ifelse(is.null(last_year),
.,
ifelse(last_year < ., last_year, .))
first_year <- min(occurrence_data$year)
last_year <- first_year

# Limit data set
occurrence_data <-
occurrence_data %>%
dplyr::filter(year >= first_year) %>%
dplyr::filter(year <= last_year)
warning("Cannot create trends with this dataset, as occurrences are all from the same year.")

} else {

# Check whether start and end years are within dataset
first_year <- occurrence_data %>%
dplyr::select(year) %>%
min(na.rm = TRUE) %>%
ifelse(is.null(first_year),
.,
ifelse(first_year > ., first_year, .))
last_year <- occurrence_data %>%
# dplyr::summarize(max_year = max(year, na.rm = TRUE)-1) %>%
dplyr::summarize(max_year = max(year, na.rm = TRUE)) %>%
dplyr::pull(max_year) %>%
ifelse(is.null(last_year),
.,
ifelse(last_year < ., last_year, .))

# Limit data set
occurrence_data <-
occurrence_data %>%
dplyr::filter(year >= first_year) %>%
dplyr::filter(year <= last_year)

}

# Remove any duplicate rows
occurrence_data <-
occurrence_data %>%
dplyr::distinct() %>%
dplyr::arrange(year)

cube <- new_processed_cube(occurrence_data, grid_type)
if (grid_type == "none" | grid_type == "custom") {

cube <- new_sim_cube(occurrence_data, grid_type)

} else {

cube <- new_processed_cube(occurrence_data, grid_type)
}

}

Expand Down Expand Up @@ -573,3 +657,4 @@ process_cube_old <- function(cube_name,
cube <- new_processed_cube(merged_data, grid_type = "eea")

}

Loading

0 comments on commit d1c4711

Please sign in to comment.