Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
Andreas Blätte authored and Andreas Blätte committed Jun 15, 2023
2 parents ee1ded8 + 983a036 commit ad1bfc0
Show file tree
Hide file tree
Showing 12 changed files with 291 additions and 109 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: RcppCWB
Type: Package
Title: 'Rcpp' Bindings for the 'Corpus Workbench' ('CWB')
Version: 0.6.0
Date: 2023-03-21
Version: 0.6.1
Date: 2023-06-13
Author: Andreas Blaette [aut, cre],
Bernard Desgraupes [aut],
Sylvain Loiseau [aut],
Expand Down
14 changes: 13 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# RcppCWB 0.5.5.9001-.9003
# RcppCWB 0.6.0.9001ff

* New function `region_to_strucs()` to get minimumum and maximum struc of
s-attribute within region provided. Works also for nested s-attributes.
* New function `region_matrix_to_struc_matrix()`.
* Functions `cl_cpos2lbound()` and `cl_cpos2rbound()` return NA if corpus
position is outside stru for given s-attribute. #78.
* Functions `cl_cpos2lbound()` and `cl_cpos2rbound()` are exposed directly from
C++ without R wrappers, improving performance. Using the environment variable
'CORPUS_REGISTRY' if argument `registry` is handled implicitly now.


# RcppCWB 0.6.0

* Rcpp wrappers for Corpus Library (CL) functions are exposed directly and
can be used in C++ functions imported using `Rcpp::sourceCpp()` or
Expand Down
39 changes: 35 additions & 4 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,33 @@
.Call(`_RcppCWB_region_matrix_context`, corpus, registry, region_matrix, p_attribute, s_attribute, boundary, left, right)
}

#' Get min and max strucs of s-attribute present in region
#'
#' Look up the minimum and maximum struc of a s-attribute within a region,
#' including scenario of nested s-attributes. If there are no regions of the
#' s-attribute within the region, `NA` values are returned.
#'
#'
#' @param corpus ID of a CWB corpus.
#' @param registry Path of the registry directory. If `NULL` (default), value
#' of environment variable 'CORPUS_REGISTRY' will be used.
#' @param s_attribute Name of structural attribute. The attribute may be
#' nested.
#' @param region Vector with left and right corpus position of region.
#' @return Depending whether input is a vector (argument `region`) or a matrix
#' (argument `region_matrix`), a vector or a matrix.
#' @param region_matrix A two-column `matrix` with regions, left corpus
#' positions in column 1, right corpus positions in column 2.
#' @rdname regions_to_strucs
region_matrix_to_struc_matrix <- function(corpus, s_attribute, region_matrix, registry = NULL) {
.Call(`_RcppCWB_region_matrix_to_struc_matrix`, corpus, s_attribute, region_matrix, registry)
}

#' @rdname regions_to_strucs
region_to_strucs <- function(corpus, s_attribute, region, registry = NULL) {
.Call(`_RcppCWB_region_to_strucs`, corpus, s_attribute, region, registry)
}

.cwb_version <- function() {
.Call(`_RcppCWB_cwb_version`)
}
Expand Down Expand Up @@ -213,8 +240,9 @@ id_to_cpos <- function(p_attr, id) {
.Call(`_RcppCWB_id_to_cpos`, p_attr, id)
}

.cl_cpos2lbound <- function(corpus, s_attribute, cpos, registry) {
.Call(`_RcppCWB__cl_cpos2lbound`, corpus, s_attribute, cpos, registry)
#' @rdname s_attributes
cl_cpos2lbound <- function(corpus, s_attribute, cpos, registry = NULL) {
.Call(`_RcppCWB_cl_cpos2lbound`, corpus, s_attribute, cpos, registry)
}

#' @rdname cl_rework
Expand All @@ -223,8 +251,11 @@ cpos_to_lbound <- function(s_attr, cpos) {
.Call(`_RcppCWB_cpos_to_lbound`, s_attr, cpos)
}

.cl_cpos2rbound <- function(corpus, s_attribute, cpos, registry) {
.Call(`_RcppCWB__cl_cpos2rbound`, corpus, s_attribute, cpos, registry)
#' @rdname s_attributes
#' @details `cl_cpos2rbound()` and `cl_cpos2lbound()` return `NA` for values of
#' cpos that are outside a struc for the structural attribute given.
cl_cpos2rbound <- function(corpus, s_attribute, cpos, registry = NULL) {
.Call(`_RcppCWB_cl_cpos2rbound`, corpus, s_attribute, cpos, registry)
}

#' @rdname cl_rework
Expand Down
33 changes: 3 additions & 30 deletions R/cl.R
Original file line number Diff line number Diff line change
Expand Up @@ -171,34 +171,6 @@ cl_struc2str <- function(corpus, s_attribute, struc, registry = Sys.getenv("CORP
.cl_struc2str(corpus = corpus, s_attribute = s_attribute, struc = struc, registry = registry)
}

#' @rdname s_attributes
cl_cpos2lbound <- function(corpus, s_attribute, cpos, registry = Sys.getenv("CORPUS_REGISTRY")){
check_registry(registry)
check_corpus(corpus, registry, cqp = FALSE)
check_s_attribute(corpus = corpus, registry = registry, s_attribute = s_attribute)

if (length(cpos) == 0L) return(integer())
check_cpos(corpus = corpus, p_attribute = "word", cpos = cpos, registry = registry)

.cl_cpos2lbound(corpus = corpus, s_attribute = s_attribute, cpos = cpos, registry = registry)
}

#' @rdname s_attributes
cl_cpos2rbound <- function(corpus, s_attribute, cpos, registry = Sys.getenv("CORPUS_REGISTRY")){
check_registry(registry)
check_corpus(corpus, registry, cqp = FALSE)
check_s_attribute(corpus = corpus, registry = registry, s_attribute = s_attribute)

check_cpos(corpus = corpus, p_attribute = "word", cpos = cpos, registry = registry)
if (length(cpos) == 0L) return(integer())

.cl_cpos2rbound(corpus = corpus, s_attribute = s_attribute, cpos = cpos, registry = registry)
}






#' @title Using Positional Attributes.
#'
Expand Down Expand Up @@ -401,8 +373,7 @@ cl_charset_name <- function(corpus, registry = Sys.getenv("CORPUS_REGISTRY")){
cl_struc_values <- function(corpus, s_attribute, registry = Sys.getenv("CORPUS_REGISTRY")){
check_corpus(corpus = corpus, registry = registry, cqp = FALSE)
registry <- normalizePath(path.expand(registry))
i <- .cl_struc_values(corpus = corpus, s_attribute = s_attribute, registry = registry)
if (i == 1L) TRUE else if (i == 0L) FALSE else if (i < 0L) as.integer(NA)
.cl_struc_values(corpus = corpus, s_attribute = s_attribute, registry = registry)
}

#' Get information from registry file
Expand Down Expand Up @@ -571,6 +542,7 @@ cl_list_corpora <- function(){
#' @name cl_rework
#' @rdname cl_rework
#' @examples
#' \donttest{
#' library(Rcpp)
#'
#' cppFunction(
Expand All @@ -590,6 +562,7 @@ cl_list_corpora <- function(){
#' )
#'
#' result <- get_str("REUTERS", "word", RcppCWB::get_tmp_registry(), 0:50)
#' }
NULL


23 changes: 6 additions & 17 deletions cran-comments.md
Original file line number Diff line number Diff line change
@@ -1,21 +1,10 @@
## General remarks

- This release replaces dependency PCRE with PCRE2 in the CWB source code
included in this package. I hereby anticipate that build machines will
successively not have PCRE installed. Fedora will presumably be first.
- This release extends auxiliary Rcpp functionality and fixes minor bugs. No
changes of fundamentals.

- Rcpp wrappers for CWB functionality is now exported such that it is possible
to write Rcpp inline C++ functions using this functionality.

- This is the first package version that includes a vignette (explaining how to
write inline C++ functions).

- The cleanup script is extended and reverts modifications that had previously
gone unnoticed.

- A failure to build Windows binaries for R-oldrel is addressed: Repository
https://github.com/PolMine/libcl that is used to get static libraries has been
updated.
- An example using `cppFunction()` ran 10 secs on winbuilder, so I wrapped this
into \donttest{}.

Previous aspects I repeat:

Expand All @@ -32,8 +21,8 @@ change.
## Test environments

* CI checks with GitHub Actions (Windows/macOS/Ubuntu)
* R winbuilder (R 4.2 release, devel, oldrel)
* local macOS, R 4.2.2 (arm64)
* R winbuilder (R 4.3.0 release, devel, oldrel)
* local macOS, R 4.3.0 (arm64)


## R CMD check results
Expand Down
28 changes: 14 additions & 14 deletions inst/include/RcppCWB_RcppExports.h
Original file line number Diff line number Diff line change
Expand Up @@ -612,17 +612,17 @@ namespace RcppCWB {
return Rcpp::as<Rcpp::IntegerVector >(rcpp_result_gen);
}

inline Rcpp::IntegerVector _cl_cpos2lbound(SEXP corpus, SEXP s_attribute, Rcpp::IntegerVector cpos, SEXP registry) {
typedef SEXP(*Ptr__cl_cpos2lbound)(SEXP,SEXP,SEXP,SEXP);
static Ptr__cl_cpos2lbound p__cl_cpos2lbound = NULL;
if (p__cl_cpos2lbound == NULL) {
validateSignature("Rcpp::IntegerVector(*_cl_cpos2lbound)(SEXP,SEXP,Rcpp::IntegerVector,SEXP)");
p__cl_cpos2lbound = (Ptr__cl_cpos2lbound)R_GetCCallable("RcppCWB", "_RcppCWB__cl_cpos2lbound");
inline Rcpp::IntegerVector cl_cpos2lbound(SEXP corpus, SEXP s_attribute, Rcpp::IntegerVector cpos, SEXP registry = R_NilValue) {
typedef SEXP(*Ptr_cl_cpos2lbound)(SEXP,SEXP,SEXP,SEXP);
static Ptr_cl_cpos2lbound p_cl_cpos2lbound = NULL;
if (p_cl_cpos2lbound == NULL) {
validateSignature("Rcpp::IntegerVector(*cl_cpos2lbound)(SEXP,SEXP,Rcpp::IntegerVector,SEXP)");
p_cl_cpos2lbound = (Ptr_cl_cpos2lbound)R_GetCCallable("RcppCWB", "_RcppCWB_cl_cpos2lbound");
}
RObject rcpp_result_gen;
{
RNGScope RCPP_rngScope_gen;
rcpp_result_gen = p__cl_cpos2lbound(Shield<SEXP>(Rcpp::wrap(corpus)), Shield<SEXP>(Rcpp::wrap(s_attribute)), Shield<SEXP>(Rcpp::wrap(cpos)), Shield<SEXP>(Rcpp::wrap(registry)));
rcpp_result_gen = p_cl_cpos2lbound(Shield<SEXP>(Rcpp::wrap(corpus)), Shield<SEXP>(Rcpp::wrap(s_attribute)), Shield<SEXP>(Rcpp::wrap(cpos)), Shield<SEXP>(Rcpp::wrap(registry)));
}
if (rcpp_result_gen.inherits("interrupted-error"))
throw Rcpp::internal::InterruptedException();
Expand Down Expand Up @@ -654,17 +654,17 @@ namespace RcppCWB {
return Rcpp::as<Rcpp::IntegerVector >(rcpp_result_gen);
}

inline Rcpp::IntegerVector _cl_cpos2rbound(SEXP corpus, SEXP s_attribute, Rcpp::IntegerVector cpos, SEXP registry) {
typedef SEXP(*Ptr__cl_cpos2rbound)(SEXP,SEXP,SEXP,SEXP);
static Ptr__cl_cpos2rbound p__cl_cpos2rbound = NULL;
if (p__cl_cpos2rbound == NULL) {
validateSignature("Rcpp::IntegerVector(*_cl_cpos2rbound)(SEXP,SEXP,Rcpp::IntegerVector,SEXP)");
p__cl_cpos2rbound = (Ptr__cl_cpos2rbound)R_GetCCallable("RcppCWB", "_RcppCWB__cl_cpos2rbound");
inline Rcpp::IntegerVector cl_cpos2rbound(SEXP corpus, SEXP s_attribute, Rcpp::IntegerVector cpos, SEXP registry = R_NilValue) {
typedef SEXP(*Ptr_cl_cpos2rbound)(SEXP,SEXP,SEXP,SEXP);
static Ptr_cl_cpos2rbound p_cl_cpos2rbound = NULL;
if (p_cl_cpos2rbound == NULL) {
validateSignature("Rcpp::IntegerVector(*cl_cpos2rbound)(SEXP,SEXP,Rcpp::IntegerVector,SEXP)");
p_cl_cpos2rbound = (Ptr_cl_cpos2rbound)R_GetCCallable("RcppCWB", "_RcppCWB_cl_cpos2rbound");
}
RObject rcpp_result_gen;
{
RNGScope RCPP_rngScope_gen;
rcpp_result_gen = p__cl_cpos2rbound(Shield<SEXP>(Rcpp::wrap(corpus)), Shield<SEXP>(Rcpp::wrap(s_attribute)), Shield<SEXP>(Rcpp::wrap(cpos)), Shield<SEXP>(Rcpp::wrap(registry)));
rcpp_result_gen = p_cl_cpos2rbound(Shield<SEXP>(Rcpp::wrap(corpus)), Shield<SEXP>(Rcpp::wrap(s_attribute)), Shield<SEXP>(Rcpp::wrap(cpos)), Shield<SEXP>(Rcpp::wrap(registry)));
}
if (rcpp_result_gen.inherits("interrupted-error"))
throw Rcpp::internal::InterruptedException();
Expand Down
2 changes: 2 additions & 0 deletions man/cl_rework.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 39 additions & 0 deletions man/regions_to_strucs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 7 additions & 13 deletions man/s_attributes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit ad1bfc0

Please sign in to comment.