Skip to content

Commit

Permalink
filterx: Redesigned regexp_search functionality
Browse files Browse the repository at this point in the history
Introduced two new optional flags: keep_zero and list_mode.
The result type no longer switches between dict and list based on the presence of named groups.
The default result type is now dict.
list_mode can force a list result, either via the flag or the type of the fillable (the fillable's type takes precedence).
Match group zero is now excluded by default unless no other groups are present. This behavior can be overridden using the keep_zero flag.

Additionally, the FilterXRematchState structure, shared across other regexp functions, has been updated with a generic flags field. This allows functions to pass custom options to the state without incurring additional overhead.

Signed-off-by: shifter <shifter@axoflow.com>
  • Loading branch information
bshifter committed Dec 3, 2024
1 parent b6be861 commit 3e22628
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 12 deletions.
60 changes: 48 additions & 12 deletions lib/filterx/expr-regexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,14 @@
FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE_NAME"=(boolean)" \
FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME"=(boolean))" \

#define FILTERX_FUNC_REGEXP_SEARCH_USAGE "Usage: regexp_search(string, pattern)"
DEFINE_FUNC_FLAG_NAMES(FilterXRegexpSearchFlags,
FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO_NAME,
FILTERX_REGEXP_SEARCH_LIST_MODE_NAME
);

#define FILTERX_FUNC_REGEXP_SEARCH_USAGE "Usage: regexp_search(string, pattern, " \
FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO_NAME"=(boolean), "\
FILTERX_REGEXP_SEARCH_LIST_MODE_NAME"=(boolean))"

typedef struct FilterXReMatchState_
{
Expand All @@ -50,6 +57,7 @@ typedef struct FilterXReMatchState_
const gchar *lhs_str;
gsize lhs_str_len;
gint rc;
FLAGSET flags;
} FilterXReMatchState;

static void
Expand Down Expand Up @@ -164,14 +172,6 @@ _match(FilterXExpr *lhs_expr, pcre2_code_8 *pattern, FilterXReMatchState *state)
return FALSE;
}

static gboolean
_has_named_capture_groups(pcre2_code_8 *pattern)
{
guint32 namecount = 0;
pcre2_pattern_info(pattern, PCRE2_INFO_NAMECOUNT, &namecount);
return namecount > 0;
}

static gboolean
_store_matches_to_list(pcre2_code_8 *pattern, const FilterXReMatchState *state, FilterXObject *fillable)
{
Expand All @@ -180,6 +180,8 @@ _store_matches_to_list(pcre2_code_8 *pattern, const FilterXReMatchState *state,

for (gint i = 0; i < num_matches; i++)
{
if (num_matches > 1 && i==0 && !check_flag(state->flags, FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO))
continue;
gint begin_index = matches[2 * i];
gint end_index = matches[2 * i + 1];
if (begin_index < 0 || end_index < 0)
Expand Down Expand Up @@ -209,6 +211,9 @@ _store_matches_to_dict(pcre2_code_8 *pattern, const FilterXReMatchState *state,
/* First store all matches with string formatted indexes as keys. */
for (guint32 i = 0; i < num_matches; i++)
{
if (num_matches > 1 && i==0 && !check_flag(state->flags, FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO))
continue;

PCRE2_SIZE begin_index = matches[2 * i];
PCRE2_SIZE end_index = matches[2 * i + 1];
if (begin_index < 0 || end_index < 0)
Expand Down Expand Up @@ -385,6 +390,7 @@ typedef struct FilterXExprRegexpSearchGenerator_
FilterXGeneratorFunction super;
FilterXExpr *lhs;
pcre2_code_8 *pattern;
FLAGSET flags;
} FilterXExprRegexpSearchGenerator;

static gboolean
Expand All @@ -395,6 +401,7 @@ _regexp_search_generator_generate(FilterXExprGenerator *s, FilterXObject *fillab
gboolean result;
FilterXReMatchState state;
_state_init(&state);
state.flags = self->flags;

gboolean matched = _match(self->lhs, self->pattern, &state);
if (!matched)
Expand Down Expand Up @@ -422,10 +429,10 @@ _regexp_search_generator_create_container(FilterXExprGenerator *s, FilterXExpr *
{
FilterXExprRegexpSearchGenerator *self = (FilterXExprRegexpSearchGenerator *) s;

if (_has_named_capture_groups(self->pattern))
return filterx_generator_create_dict_container(s, fillable_parent);
if (check_flag(self->flags, FILTERX_REGEXP_SEARCH_LIST_MODE))
return filterx_generator_create_list_container(s, fillable_parent);

return filterx_generator_create_list_container(s, fillable_parent);
return filterx_generator_create_dict_container(s, fillable_parent);
}

static gboolean
Expand Down Expand Up @@ -459,6 +466,29 @@ _regexp_search_generator_free(FilterXExpr *s)
filterx_generator_function_free_method(&self->super);
}

static gboolean
_extract_optional_arg_flag(FilterXExprRegexpSearchGenerator *self, FilterXRegexpSearchFlags flag,
FilterXFunctionArgs *args, GError **error)
{
gboolean exists, eval_error;
g_assert(flag < FilterXRegexpSearchFlags_MAX);
const gchar *arg_name = FilterXRegexpSearchFlags_NAMES[flag];
gboolean value = filterx_function_args_get_named_literal_boolean(args, arg_name, &exists, &eval_error);
if (!exists)
return TRUE;

if (eval_error)
{
g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL,
"%s argument must be boolean literal. " FILTERX_FUNC_REGEXP_SEARCH_USAGE, arg_name);
return FALSE;
}

set_flag(&self->flags, flag, value);

return TRUE;
}

static gboolean
_extract_search_args(FilterXExprRegexpSearchGenerator *self, FilterXFunctionArgs *args, GError **error)
{
Expand Down Expand Up @@ -504,6 +534,12 @@ filterx_generator_function_regexp_search_new(FilterXFunctionArgs *args, GError *
self->super.super.super.free_fn = _regexp_search_generator_free;
self->super.super.create_container = _regexp_search_generator_create_container;

if (!_extract_optional_arg_flag(self, FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO, args, error))
goto error;

if (!_extract_optional_arg_flag(self, FILTERX_REGEXP_SEARCH_LIST_MODE, args, error))
goto error;

if (!_extract_search_args(self, args, error) ||
!filterx_function_args_check(args, error))
goto error;
Expand Down
11 changes: 11 additions & 0 deletions lib/filterx/expr-regexp.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "filterx/filterx-expr.h"
#include "filterx/expr-generator.h"
#include "filterx/expr-function.h"
#include "filterx/func-flags.h"

#define FILTERX_FUNC_REGEXP_SUBST_FLAG_JIT_NAME "jit"
#define FILTERX_FUNC_REGEXP_SUBST_FLAG_GLOBAL_NAME "global"
Expand All @@ -35,6 +36,16 @@
#define FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE_NAME "newline"
#define FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME "groups"

DEFINE_FUNC_FLAGS(FilterXRegexpSearchFlags,
FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO,
FILTERX_REGEXP_SEARCH_LIST_MODE
);

#define FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO_NAME "keep_zero"
#define FILTERX_REGEXP_SEARCH_LIST_MODE_NAME "list_mode"

extern const char *FilterXRegexpSearchFlags_NAMES[];

typedef struct FilterXFuncRegexpSubstOpts_
{
gboolean global;
Expand Down

0 comments on commit 3e22628

Please sign in to comment.