From 9b1d8fdc12c66611e73481f3e7b6f8f7c0fd0bbd Mon Sep 17 00:00:00 2001 From: Mauricio 'Pacha' Vargas Sepulveda Date: Thu, 19 Sep 2024 15:14:11 -0400 Subject: [PATCH] fully implement #56 and add Munk to DESCRIPTION --- DESCRIPTION | 4 +++- R/locate_area.R | 4 +++- vignettes/tabulapdf.Rmd | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 95d6a01..432802b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,7 +28,9 @@ Authors@R: c( comment = "rOpenSci reviewer"), person("Lincoln", "Mullen", role = "ctb", - comment = "rOpenSci reviewer") + comment = "rOpenSci reviewer"), + person("Munk School of Global Affairs and Public Policy", + role = "fnd") ) License: Apache License (>= 2) URL: https://docs.ropensci.org/tabulapdf/ (website) diff --git a/R/locate_area.R b/R/locate_area.R index f97c71e..f30c70d 100644 --- a/R/locate_area.R +++ b/R/locate_area.R @@ -61,12 +61,13 @@ locate_areas <- function(file, file <- localize_file(file, copy = copy) # on.exit(unlink(file), add = TRUE) dims <- get_page_dims(file, pages = pages) + if (!is.null(thumbnails)) { filelist <- list.files(path.expand(thumbnails), pattern = "\\.png$", ignore.case = TRUE, full.names = TRUE) file.copy(filelist, tempdir(), overwrite = T) paths <- file.path(tempdir(), basename(filelist)) cat("fetching files") - } else if (is.null(pages)) { + } else { paths <- make_thumbnails(file, outdir = tempdir(), pages = pages, @@ -127,6 +128,7 @@ extract_areas <- function(file, extract_tables( file = file, pages = pages, + thumbnails = NULL, area = areas, guess = guess, ... diff --git a/vignettes/tabulapdf.Rmd b/vignettes/tabulapdf.Rmd index da03b9a..71dabe5 100644 --- a/vignettes/tabulapdf.Rmd +++ b/vignettes/tabulapdf.Rmd @@ -147,8 +147,8 @@ The selection from Figure 1 can be used to extract the tables as follows: ```{r} # manual selection, result transcribed below -# first_table <- locate_areas(f, pages = 2) -# second_table <- locate_areas(f, pages = 2) +# first_table <- locate_areas(f, pages = 2)[[1]] +# second_table <- locate_areas(f, pages = 2)[[1]] first_table <- c(58.15032, 125.26869, 182.02355, 488.12966) second_table <- c(387.7791, 125.2687, 513.7519, 492.3246)