Skip to content

Commit

Permalink
fix bug in sits_uncertainty_sampling
Browse files Browse the repository at this point in the history
  • Loading branch information
gilbertocamara committed Oct 20, 2024
1 parent 060ab58 commit 74488b7
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 87 deletions.
8 changes: 4 additions & 4 deletions R/api_som.R
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,10 @@
#' @param som_map kohonen_map
#' @return adjacency matrix with the distances btw neurons.
#'
.som_adjacency <- function(som_map) {
koh <- som_map$som_properties
adjacency <- as.matrix(proxy::dist(koh$codes$NDVI, method = "dtw"))
}
# .som_adjacency <- function(som_map) {
# koh <- som_map$som_properties
# adjacency <- proxy::as.matrix(proxy::dist(koh$codes$NDVI, method = "dtw"))
# }

#' @title Transform SOM map into sf object.
#' @name .som_to_sf
Expand Down
130 changes: 52 additions & 78 deletions R/sits_active_learning.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#'
#' @param uncert_cube An uncertainty cube.
#' See \code{\link[sits]{sits_uncertainty}}.
#' @param n Number of suggested points.
#' @param n Number of suggested points per tile
#' @param min_uncert Minimum uncertainty value to select a sample.
#' @param sampling_window Window size for collecting points (in pixels).
#' The minimum window size is 10.
Expand Down Expand Up @@ -75,7 +75,6 @@
#' }
#'
#' @export
#'
sits_uncertainty_sampling <- function(uncert_cube,
n = 100L,
min_uncert = 0.4,
Expand All @@ -90,55 +89,55 @@ sits_uncertainty_sampling <- function(uncert_cube,
.check_int_parameter(sampling_window, min = 10L)
.check_int_parameter(multicores, min = 1, max = 2048)
.check_int_parameter(memsize, min = 1, max = 16384)
# Get block size
block <- .raster_file_blocksize(.raster_open_rast(.tile_path(uncert_cube)))
# Overlapping pixels
overlap <- ceiling(sampling_window / 2) - 1
# Check minimum memory needed to process one block
job_memsize <- .jobs_memsize(
job_size = .block_size(block = block, overlap = overlap),
npaths = sampling_window,
nbytes = 8,
proc_bloat = .conf("processing_bloat_cpu")
)
# Update multicores parameter
multicores <- .jobs_max_multicores(
job_memsize = job_memsize,
memsize = memsize,
multicores = multicores
)
# Update block parameter
block <- .jobs_optimal_block(
job_memsize = job_memsize,
block = block,
image_size = .tile_size(.tile(uncert_cube)),
memsize = memsize,
multicores = multicores
)
# Prepare parallel processing
.parallel_start(workers = multicores)
on.exit(.parallel_stop(), add = TRUE)
# Slide on cube tiles
samples_tb <- slider::slide_dfr(uncert_cube, function(tile) {
# Create chunks as jobs
chunks <- .tile_chunks_create(
tile = tile,
overlap = overlap,
block = block
# open spatial raster object
rast <- .raster_open_rast(.tile_path(tile))
# get the values
values <- .raster_get_values(rast)
# sample the maximum values
samples_tile <- C_max_sampling(
x = values,
nrows = nrow(rast),
ncols = ncol(rast),
window_size = sampling_window
)
# Tile path
tile_path <- .tile_path(tile)
# Get a list of values of high uncertainty
# Process jobs in parallel
top_values <- .jobs_map_parallel_dfr(chunks, function(chunk) {
# Read and preprocess values
.raster_open_rast(tile_path) |>
.raster_get_top_values(
block = .block(chunk),
band = 1,
n = n,
sampling_window = sampling_window
# get the top most values
samples_tile <- samples_tile |>
# randomly shuffle the rows of the dataset
dplyr::sample_frac() |>
dplyr::slice_max(
.data[["value"]],
n = n,
with_ties = FALSE
)
# transform to tibble
tb <- rast |>
terra::xyFromCell(
cell = samples_tile[["cell"]]
) |>
tibble::as_tibble()
# find NA
na_rows <- which(is.na(tb))
# remove NA
if (length(na_rows) > 0) {
tb <- tb[-na_rows, ]
samples_tile <- samples_tile[-na_rows, ]
}
# Get the values' positions.
result_tile <- tb |>
sf::st_as_sf(
coords = c("x", "y"),
crs = .raster_crs(rast),
dim = "XY",
remove = TRUE
) |>
sf::st_transform(crs = "EPSG:4326") |>
sf::st_coordinates()

colnames(result_tile) <- c("longitude", "latitude")
result_tile <- result_tile |>
dplyr::bind_cols(samples_tile) |>
dplyr::mutate(
value = .data[["value"]] *
.conf("probs_cube_scale_factor")
Expand All @@ -150,40 +149,15 @@ sits_uncertainty_sampling <- function(uncert_cube,
c("longitude", "latitude", "value")
)) |>
tibble::as_tibble()
})
# All the cube's uncertainty images have the same start & end dates.
top_values[["start_date"]] <- .tile_start_date(tile)
top_values[["end_date"]] <- .tile_end_date(tile)
top_values[["label"]] <- "NoClass"

return(top_values)
# All the cube's uncertainty images have the same start & end dates.
result_tile[["start_date"]] <- .tile_start_date(uncert_cube)
result_tile[["end_date"]] <- .tile_end_date(uncert_cube)
result_tile[["label"]] <- "NoClass"
return(result_tile)
})

# Slice result samples
result_tb <- samples_tb |>
dplyr::slice_max(
order_by = .data[["value"]], n = n,
with_ties = FALSE
) |>
dplyr::transmute(
longitude = .data[["longitude"]],
latitude = .data[["latitude"]],
start_date = .data[["start_date"]],
end_date = .data[["end_date"]],
label = .data[["label"]],
uncertainty = .data[["value"]]
)

# Warn if it cannot suggest all required samples
if (nrow(result_tb) < n) {
warning(.conf("messages", "sits_uncertainty_sampling_window"),
call. = FALSE)
}

class(result_tb) <- c("sits_uncertainty", "sits", class(result_tb))
return(result_tb)
return(samples_tb)
}

#' @title Suggest high confidence samples to increase the training set.
#'
#' @name sits_confidence_sampling
Expand Down
8 changes: 7 additions & 1 deletion R/sits_cube.R
Original file line number Diff line number Diff line change
Expand Up @@ -310,12 +310,18 @@
#' )
#' )
#' # --- Create a cube based on a local MODIS data
#' # MODIS local files have names such as
#' # "TERRA_MODIS_012010_NDVI_2013-09-14.jp2"
#' # see the parse info parameter as an example on how to
#' # decode local files
#' data_dir <- system.file("extdata/raster/mod13q1", package = "sits")
#' modis_cube <- sits_cube(
#' source = "BDC",
#' collection = "MOD13Q1-6.1",
#' data_dir = data_dir
#' data_dir = data_dir,
#' parse_info = c("satellite", "sensor", "tile", "band", "date")
#' )
#'
#' }
#' @export
sits_cube <- function(source, collection, ...) {
Expand Down
2 changes: 1 addition & 1 deletion R/sits_uncertainty.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#' providing information about the confidence of the model.
#' The supported types of uncertainty are 'entropy', 'least', and 'margin'.
#' 'entropy' is the difference between all predictions expressed as
#' entropy, 'least' is the difference between 100% and most confident
#' entropy, 'least' is the difference between 1.0 and most confident
#' prediction, and 'margin' is the difference between the two most confident
#' predictions.
#'
Expand Down
2 changes: 2 additions & 0 deletions inst/extdata/config_internals.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ sits_results_s3_class:
probs-vector: "probs_vector_cube"
bayes: "probs_cube"
uncert: "uncertainty_cube"
margin: "uncertainty_cube"
least: "uncertainty_cube"
entropy: "uncertainty_cube"
variance: "variance_cube"
class: "class_cube"
Expand Down
8 changes: 7 additions & 1 deletion man/sits_cube.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/sits_uncertainty.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/sits_uncertainty_sampling.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 74488b7

Please sign in to comment.