Skip to content

Commit

Permalink
Add function rb_reg_onig_match
Browse files Browse the repository at this point in the history
rb_reg_onig_match performs preparation, error handling, and cleanup for
matching a regex against a string. This reduces repetitive code and
removes the need for StringScanner to access internal data of regex.
  • Loading branch information
peterzhu2118 committed Jul 27, 2023
1 parent e5effa4 commit 7193b40
Show file tree
Hide file tree
Showing 5 changed files with 185 additions and 191 deletions.
1 change: 1 addition & 0 deletions ext/strscan/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
if RUBY_ENGINE == 'ruby'
$INCFLAGS << " -I$(top_srcdir)" if $extmk
have_func("onig_region_memsize", "ruby.h")
have_func("rb_reg_onig_match", "ruby.h")
create_makefile 'strscan'
else
File.write('Makefile', dummy_makefile("").join)
Expand Down
107 changes: 68 additions & 39 deletions ext/strscan/strscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,68 @@ adjust_register_position(struct strscanner *p, long position)
}
}

/* rb_reg_onig_match is available in Ruby 3.3 and later. */
#ifndef HAVE_RB_REG_ONIG_MATCH
static OnigPosition
rb_reg_onig_match(VALUE re, VALUE str,
OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args),
void *args, struct re_registers *regs)
{
regex_t *reg = rb_reg_prepare_re(re, str);

bool tmpreg = reg != RREGEXP_PTR(re);
if (!tmpreg) RREGEXP(re)->usecnt++;

OnigPosition result = match(reg, str, regs, args);

if (!tmpreg) RREGEXP(re)->usecnt--;
if (tmpreg) {
if (RREGEXP(re)->usecnt) {
onig_free(reg);
}
else {
onig_free(RREGEXP_PTR(re));
RREGEXP_PTR(re) = reg;
}
}

if (result < 0) {
if (result != ONIG_MISMATCH) {
rb_raise(ScanError, "regexp buffer overflow");
}
}

return result;
}
#endif

static OnigPosition
strscan_match(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr)
{
struct strscanner *p = (struct strscanner *)args_ptr;

return onig_match(reg,
match_target(p),
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
(UChar* )CURPTR(p),
regs,
ONIG_OPTION_NONE);
}

static OnigPosition
strscan_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr)
{
struct strscanner *p = (struct strscanner *)args_ptr;

return onig_search(reg,
match_target(p),
(UChar *)(CURPTR(p) + S_RESTLEN(p)),
(UChar *)CURPTR(p),
(UChar *)(CURPTR(p) + S_RESTLEN(p)),
regs,
ONIG_OPTION_NONE);
}

static VALUE
strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
{
Expand All @@ -560,47 +622,14 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
}

if (RB_TYPE_P(pattern, T_REGEXP)) {
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
regex_t *re;
long ret;
int tmpreg;

p->regex = pattern;
re = rb_reg_prepare_re(pattern, p->str);
tmpreg = re != RREGEXP_PTR(pattern);
if (!tmpreg) RREGEXP(pattern)->usecnt++;

if (headonly) {
ret = onig_match(re,
match_target(p),
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
(UChar* )CURPTR(p),
&(p->regs),
ONIG_OPTION_NONE);
}
else {
ret = onig_search(re,
match_target(p),
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
(UChar* )CURPTR(p),
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
&(p->regs),
ONIG_OPTION_NONE);
}
if (!tmpreg) RREGEXP(pattern)->usecnt--;
if (tmpreg) {
if (RREGEXP(pattern)->usecnt) {
onig_free(re);
}
else {
onig_free(RREGEXP_PTR(pattern));
RREGEXP_PTR(pattern) = re;
}
}
OnigPosition ret = rb_reg_onig_match(pattern,
p->str,
headonly ? strscan_match : strscan_search,
(void *)p,
&(p->regs));

if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
if (ret < 0) {
/* not matched */
if (ret == ONIG_MISMATCH) {
return Qnil;
}
}
Expand Down
18 changes: 11 additions & 7 deletions include/ruby/re.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <stdio.h>

#include "ruby/onigmo.h"
#include "ruby/regex.h"
#include "ruby/internal/core/rmatch.h"
#include "ruby/internal/dllexport.h"
Expand Down Expand Up @@ -105,25 +106,28 @@ long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir);
VALUE rb_reg_quote(VALUE str);

/**
* Exercises various checks and preprocesses so that the given regular
* expression can be applied to the given string. The preprocess here includes
* (but not limited to) for instance encoding conversion.
* Runs a regular expression match using function `match`. Performs preparation,
* error handling, and memory cleanup.
*
* @param[in] re Target regular expression.
* @param[in] str What `re` is about to run on.
* @param[in] match The function to run to match `str` against `re`.
* @param[in] args Pointer to arguments to pass into `match`.
* @param[out] regs Registers on a successful match.
* @exception rb_eArgError `re` does not fit for `str`.
* @exception rb_eEncCompatError `re` and `str` are incompatible.
* @exception rb_eRegexpError `re` is malformed.
* @return A preprocessesed pattern buffer ready to be applied to `str`.
* @note The return value is manages by our GC. Don't free.
* @return Match position on a successful match, `ONIG_MISMATCH` otherwise.
*
* @internal
*
* The return type, `regex_t *`, is defined in `<ruby/onigmo.h>`, _and_
* The type `regex_t *` is defined in `<ruby/onigmo.h>`, _and_
* _conflicts_ with POSIX's `<regex.h>`. We can no longer save the situation
* at this point. Just don't mix the two.
*/
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
OnigPosition rb_reg_onig_match(VALUE re, VALUE str,
OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args),
void *args, struct re_registers *regs);

/**
* Duplicates a match data. This is roughly the same as `onig_region_copy()`,
Expand Down
Loading

0 comments on commit 7193b40

Please sign in to comment.