Skip to content

Commit

Permalink
Merge pull request #1181 from M3nin0/fix/segs-base-classify
Browse files Browse the repository at this point in the history
  • Loading branch information
gilbertocamara authored Jul 26, 2024
2 parents 434d9c5 + 8aae5e2 commit ea05a26
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 63 deletions.
6 changes: 6 additions & 0 deletions R/api_classify.R
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,8 @@
#' in the classified images for each corresponding year.
#'
#' @param tile Single tile of a data cube.
#' @param bands Bands to extract time series
#' @param base_bands Base bands to extract values
#' @param ml_model Model trained by \code{\link[sits]{sits_train}}.
#' @param block Optimized block to be read into memory.
#' @param roi Region of interest.
Expand All @@ -241,6 +243,8 @@
#' @param progress Show progress bar?
#' @return List of the classified raster layers.
.classify_vector_tile <- function(tile,
bands,
base_bands,
ml_model,
block,
roi,
Expand Down Expand Up @@ -322,6 +326,8 @@
# Extract segments time series
segments_ts <- .segments_poly_read(
tile = tile,
bands = bands,
base_bands = base_bands,
chunk = chunk,
n_sam_pol = n_sam_pol,
impute_fn = impute_fn
Expand Down
137 changes: 78 additions & 59 deletions R/api_segments.R
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@
y = data,
by = c(pol_id = "polygon_id")
) |>
dplyr::filter(.data[["pol_id"]] %in% unique(data[["polygon_id"]]))
dplyr::filter(.data[["pol_id"]] %in% unique(data[["polygon_id"]]))
}
#'
#' @name .segments_data_read
Expand All @@ -273,78 +273,45 @@
#' @description Using the segments as polygons, get all time series
#'
#' @param tile tile of regular data cube
#' @param bands Bands to extract time series
#' @param base_bands Base bands to extract values
#' @param chunk A chunk to be read.
#' @param n_sam_pol Number of samples per polygon to be read.
#' @param impute_fn Imputation function to remove NA
#'
#' @return samples associated to segments
.segments_poly_read <- function(tile, chunk, n_sam_pol, impute_fn) {
.segments_poly_read <- function(
tile, bands, base_bands, chunk, n_sam_pol, impute_fn
) {
# define bands variables
ts_bands <- NULL
ts_bands_base <- NULL
# For cubes that have a time limit to expire (MPC cubes only)
tile <- .cube_token_generator(cube = tile)
# Read and preprocess values of cloud
# Get tile bands
tile_bands <- .tile_bands(
tile = tile,
add_cloud = FALSE
)
# Read and preprocess values of each band
ts_bands <- purrr::map(tile_bands, function(band) {
ts_bands <- purrr::map(bands, function(band) {
# extract band values
values <- .tile_extract_segments(
.tile_read_segments(
tile = tile,
band = band,
chunk = chunk
)
pol_id <- values[, "pol_id"]
values <- values[, -1:0]
# Correct missing, minimum, and maximum values and
# apply scale and offset.
band_conf <- .tile_band_conf(
tile = tile,
band = band
chunk = chunk,
impute_fn = impute_fn
)
miss_value <- .miss_value(band_conf)
if (.has(miss_value)) {
values[values == miss_value] <- NA
}
min_value <- .min_value(band_conf)
if (.has(min_value)) {
values[values < min_value] <- NA
}
max_value <- .max_value(band_conf)
if (.has(max_value)) {
values[values > max_value] <- NA
}
scale <- .scale(band_conf)
if (.has(scale) && scale != 1) {
values <- values * scale
}
offset <- .offset(band_conf)
if (.has(offset) && offset != 0) {
values <- values + offset
}
# are there NA values? interpolate them
if (anyNA(values)) {
values <- impute_fn(values)
}
# Returning extracted time series
return(list(pol_id, c(t(unname(values)))))
})
# extract the pol_id information from the first element of the list
pol_id <- ts_bands[[1]][[1]]
# remove the first element of the each list and retain the second
ts_bands <- purrr::map(ts_bands, function(ts_band) ts_band[[2]])
# rename the resulting list
names(ts_bands) <- tile_bands
names(ts_bands) <- bands
# transform the list to a tibble
ts_bands <- tibble::as_tibble(ts_bands)
# retrieve the dates of the tile
n_dates <- length(.tile_timeline(tile))
# find how many samples have been extracted from the tile
n_samples <- nrow(ts_bands) / n_dates
# include sample_id information
ts_bands[["sample_id"]] <- rep(seq_len(n_samples),
each = n_dates)
ts_bands[["sample_id"]] <- rep(seq_len(n_samples), each = n_dates)
# include timeline
ts_bands[["Index"]] <- rep(
.tile_timeline(tile),
Expand All @@ -353,23 +320,70 @@
# nest the values by bands
ts_bands <- tidyr::nest(
ts_bands,
time_series = c("Index", dplyr::all_of(tile_bands))
time_series = c("Index", dplyr::all_of(bands))
)
# if `base_bands` is available, transform it to the same structure as
# `time_series`
if (.has(base_bands)) {
# read base data values
ts_bands_base <- purrr::map(base_bands, function(band) {
.tile_read_segments(
tile = .tile_base_info(tile),
band = band,
chunk = chunk,
impute_fn = impute_fn
)
})
# remove polygon ids
ts_bands_base <- purrr::map(ts_bands_base,
function(ts_band) ts_band[[2]])
# name band values
names(ts_bands_base) <- base_bands
# merge band values
ts_bands_base <- dplyr::bind_cols(ts_bands_base)
# include time reference in the data
ts_bands_base[["Index"]] <- rep(
.tile_timeline(.tile_base_info(tile)),
times = n_samples
)
# include base bands data
ts_bands <- tibble::add_column(ts_bands, ts_bands_base)
# nest base data
ts_bands <- tidyr::nest(
ts_bands,
base_data = c("Index", dplyr::all_of(base_bands))
)
}
# include the ids of the polygons
ts_bands[["polygon_id"]] <- pol_id
# we do the unnest again because we do not know the polygon id index
ts_bands <- tidyr::unnest(ts_bands, "time_series")
# remove pixels where all timeline was NA
ts_bands <- tidyr::drop_na(ts_bands)
# nest the values by bands
ts_bands <- tidyr::nest(
ts_bands,
time_series = c("Index", dplyr::all_of(tile_bands))
)
# define which columns must be checked to drop na values
drop_na_colums <- list("time_series" = bands)
# if `base_bands` is available, to `base_data` column is used
if (.has(base_bands)) {
drop_na_colums[["base_data"]] <- base_bands
}
# drop na values
for (colname in names(drop_na_colums)) {
# we do the unnest again because we do not know the polygon id index
ts_bands <- tidyr::unnest(ts_bands, colname)
# remove pixels where all timeline was NA
ts_bands <- tidyr::drop_na(ts_bands)
# nest the values by bands
ts_bands <- tidyr::nest(
ts_bands,
!!colname := c("Index", dplyr::all_of(drop_na_colums[[colname]]))
)
}
# define columns used in the points nest
points_nest <- c("sample_id", "time_series")
# if `base_bands` is available, include it in the nest operation
if (.has(base_bands)) {
points_nest <- c(points_nest, "base_data")
}
# nest the values by sample_id and time_series
ts_bands <- tidyr::nest(
ts_bands,
points = c("sample_id", "time_series")
points = points_nest
)
# retrieve the segments
segments <- .vector_read_vec(chunk[["segments"]][[1]])
Expand Down Expand Up @@ -404,5 +418,10 @@
samples <- .discard(samples, "sample_id")
# set sits class
class(samples) <- c("sits", class(samples))
# define `sits_base` if applicable
if (.has(base_bands)) {
class(samples) <- c("sits_base", class(samples))
}
# return!
return(samples)
}
65 changes: 61 additions & 4 deletions R/api_tile.R
Original file line number Diff line number Diff line change
Expand Up @@ -1443,11 +1443,11 @@ NULL
#' @keywords internal
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description Given a data cube, retrieve the time series of XY locations
#' @description Given a tile and a band, return a set of values for segments
#'
#' @param tile ... TODO: document
#' @param band ...
#' @param chunk ...
#' @param tile Metadata about a data cube (one tile)
#' @param band Name of the band to the retrieved
#' @param chunk Chunk from where segments data will be extracted
#'
#' @return Data.frame with values per polygon.
.tile_extract_segments <- function(tile, band, chunk) {
Expand All @@ -1472,6 +1472,63 @@ NULL
# Return values
return(as.matrix(values))
}
#' @title Given a tile and a band, return a set of values for segments ready to
#' be used
#' @name .tile_extract_segments
#' @noRd
#' @keywords internal
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description Given a tile and a band, return a set of values for segments
#' ready to be used (e.g., scale transformation, offset, and so on).
#'
#' @param tile Metadata about a data cube (one tile)
#' @param band Name of the band to the retrieved
#' @param chunk Chunk from where segments data will be extracted
#' @param impute_fn Imputation function to remove NA
#'
#' @return Data.frame with values per polygon.
.tile_read_segments <- function(tile, band, chunk, impute_fn) {
values <- .tile_extract_segments(
tile = tile,
band = band,
chunk = chunk
)
pol_id <- values[, "pol_id"]
values <- values[, -1:0]
# Correct missing, minimum, and maximum values and
# apply scale and offset.
band_conf <- .tile_band_conf(
tile = tile,
band = band
)
miss_value <- .miss_value(band_conf)
if (.has(miss_value)) {
values[values == miss_value] <- NA
}
min_value <- .min_value(band_conf)
if (.has(min_value)) {
values[values < min_value] <- NA
}
max_value <- .max_value(band_conf)
if (.has(max_value)) {
values[values > max_value] <- NA
}
scale <- .scale(band_conf)
if (.has(scale) && scale != 1) {
values <- values * scale
}
offset <- .offset(band_conf)
if (.has(offset) && offset != 0) {
values <- values + offset
}
# are there NA values? interpolate them
if (anyNA(values)) {
values <- impute_fn(values)
}
# Returning extracted time series
return(list(pol_id, c(t(unname(values)))))
}
#' @title Check if tile contains cloud band
#' @keywords internal
#' @noRd
Expand Down
12 changes: 12 additions & 0 deletions R/sits_classify.R
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,16 @@ sits_classify.segs_cube <- function(data,
}
if (.has(filter_fn))
.check_filter_fn(filter_fn)
# By default, base bands is null.
base_bands <- NULL
if (.cube_is_base(data)) {
# Get base bands
base_bands <- intersect(
.ml_bands(ml_model), .cube_bands(.cube_base_info(data))
)
}
# get non-base bands
bands <- setdiff(.ml_bands(ml_model), base_bands)
# Check memory and multicores
# Get block size
block <- .raster_file_blocksize(.raster_open_rast(.tile_path(data)))
Expand Down Expand Up @@ -471,6 +481,8 @@ sits_classify.segs_cube <- function(data,
# Classify all the segments for each tile
class_vector <- .classify_vector_tile(
tile = tile,
bands = bands,
base_bands = base_bands,
ml_model = ml_model,
block = block,
roi = roi,
Expand Down

0 comments on commit ea05a26

Please sign in to comment.