diff --git a/lib/filterx/expr-regexp.c b/lib/filterx/expr-regexp.c index cb8df99ca..fe88776b9 100644 --- a/lib/filterx/expr-regexp.c +++ b/lib/filterx/expr-regexp.c @@ -38,7 +38,8 @@ FILTERX_FUNC_REGEXP_SUBST_FLAG_GLOBAL_NAME"=(boolean) " \ FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8_NAME"=(boolean) " \ FILTERX_FUNC_REGEXP_SUBST_FLAG_IGNORECASE_NAME"=(boolean) " \ - FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE_NAME"=(boolean))" \ + FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE_NAME"=(boolean)" \ + FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME"=(boolean))" \ #define FILTERX_FUNC_REGEXP_SEARCH_USAGE "Usage: regexp_search(string, pattern)" @@ -48,6 +49,7 @@ typedef struct FilterXReMatchState_ FilterXObject *lhs_obj; const gchar *lhs_str; gsize lhs_str_len; + gint rc; } FilterXReMatchState; static void @@ -112,6 +114,7 @@ _match_inner(FilterXReMatchState *state, pcre2_code_8 *pattern, gint start_offse gint rc = pcre2_match(pattern, (PCRE2_SPTR) state->lhs_str, (PCRE2_SIZE) state->lhs_str_len, (PCRE2_SIZE) start_offset, 0, state->match_data, NULL); + state->rc = rc; if (rc < 0) { switch (rc) @@ -543,18 +546,67 @@ _is_zero_length_match(PCRE2_SIZE *ovector) return ovector[0] == ovector[1]; } +static gboolean +_build_replacement_stirng_with_match_groups(const FilterXFuncRegexpSubst *self, FilterXReMatchState *state, + GString *replacement_string) +{ + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(state->match_data); + g_string_set_size(replacement_string, 0); + const gchar *rep_ptr = self->replacement; + const gchar *last_ptr = rep_ptr; + gint num_grps = state->rc; + + while (*rep_ptr) + { + if (*rep_ptr == '\\') + { + rep_ptr++; + if (*rep_ptr >= '1' && *rep_ptr <= '9') + { + gint grp_idx = *rep_ptr - '0'; + if (grp_idx < num_grps) + { + PCRE2_SIZE start = ovector[2 * grp_idx]; + PCRE2_SIZE end = ovector[2 * grp_idx + 1]; + if (start != PCRE2_UNSET) + { + g_string_append_len(replacement_string, last_ptr, rep_ptr - last_ptr - 1); + last_ptr = rep_ptr + 1; + size_t group_len = end - start; + g_string_append_len(replacement_string, state->lhs_str + start, group_len); + } + } + } + rep_ptr++; + } + else + rep_ptr++; + } + g_string_append_len(replacement_string, last_ptr, rep_ptr - last_ptr); + return TRUE; +} + static FilterXObject * _replace_matches(const FilterXFuncRegexpSubst *self, FilterXReMatchState *state) { GString *new_value = scratch_buffers_alloc(); PCRE2_SIZE *ovector = NULL; gint pos = 0; + const gchar *replacement_string = self->replacement; + + if (self->opts.groups) + { + GString *rep_str = scratch_buffers_alloc(); + _build_replacement_stirng_with_match_groups(self, state, rep_str); + replacement_string = rep_str->str; + } + do { ovector = pcre2_get_ovector_pointer(state->match_data); g_string_append_len(new_value, state->lhs_str + pos, _start_offset(ovector) - pos); - g_string_append(new_value, self->replacement); + g_string_append(new_value, replacement_string); if (_is_zero_length_match(ovector)) { @@ -574,7 +626,7 @@ _replace_matches(const FilterXFuncRegexpSubst *self, FilterXReMatchState *state) // handle the very last of zero lenght matches if (_is_zero_length_match(ovector)) - g_string_append(new_value, self->replacement); + g_string_append(new_value, replacement_string); return filterx_string_new(new_value->str, new_value->len); } @@ -689,6 +741,9 @@ _extract_optional_flags(FilterXFuncRegexpSubst *self, FilterXFunctionArgs *args, if (!_extract_literal_bool(args, FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8_NAME, &self->opts.utf8, error)) return FALSE; + if (!_extract_literal_bool(args, FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME, + &self->opts.groups, error)) + return FALSE; return TRUE; } diff --git a/lib/filterx/expr-regexp.h b/lib/filterx/expr-regexp.h index 0e57b7db9..4d5c43d13 100644 --- a/lib/filterx/expr-regexp.h +++ b/lib/filterx/expr-regexp.h @@ -33,6 +33,7 @@ #define FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8_NAME "utf8" #define FILTERX_FUNC_REGEXP_SUBST_FLAG_IGNORECASE_NAME "ignorecase" #define FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE_NAME "newline" +#define FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME "groups" typedef struct FilterXFuncRegexpSubstOpts_ { @@ -41,6 +42,7 @@ typedef struct FilterXFuncRegexpSubstOpts_ gboolean utf8; gboolean ignorecase; gboolean newline; + gboolean groups; } FilterXFuncRegexpSubstOpts; FilterXExpr *filterx_expr_regexp_match_new(FilterXExpr *lhs, const gchar *pattern); diff --git a/lib/filterx/tests/test_expr_regexp.c b/lib/filterx/tests/test_expr_regexp.c index 6fab5a51f..3447b0e40 100644 --- a/lib/filterx/tests/test_expr_regexp.c +++ b/lib/filterx/tests/test_expr_regexp.c @@ -276,6 +276,9 @@ _build_subst_func(const gchar *pattern, const gchar *repr, const gchar *str, Fil if (opts.utf8) args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8_NAME, filterx_literal_new(filterx_boolean_new(TRUE)))); + if (opts.groups) + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME, + filterx_literal_new(filterx_boolean_new(TRUE)))); GError *err = NULL; FilterXExpr *func = filterx_function_regexp_subst_new(filterx_function_args_new(args, NULL), &err); @@ -527,6 +530,33 @@ Test(filterx_expr_regexp, regexp_subst_match_opt_ignorecase_nojit) filterx_object_unref(result_alt); } +Test(filterx_expr_regexp, regexp_subst_group_subst) +{ + FilterXFuncRegexpSubstOpts opts = {}; + FilterXObject *result = _sub("(\\d{2})-(\\d{2})-(\\d{4})", "\\3-\\2-\\1", "25-02-2022", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "\\3-\\2-\\1"); + filterx_object_unref(result); + + FilterXFuncRegexpSubstOpts opts_alt = {.groups = TRUE}; + FilterXObject *result_alt = _sub("(\\d{2})-(\\d{2})-(\\d{4})", "\\3-\\2-\\1", "25-02-2022", opts_alt); + cr_assert(filterx_object_is_type(result_alt, &FILTERX_TYPE_NAME(string))); + const gchar *res_alt = filterx_string_get_value_ref(result_alt, NULL); + cr_assert_str_eq(res_alt, "2022-02-25"); + filterx_object_unref(result_alt); +} + +Test(filterx_expr_regexp, regexp_subst_group_subst_without_ref) +{ + FilterXFuncRegexpSubstOpts opts = {.groups = TRUE}; + FilterXObject *result = _sub("(\\d{2})-(\\d{2})-(\\d{4})", "group without ref", "25-02-2022", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "group without ref"); + filterx_object_unref(result); +} + static void setup(void) { diff --git a/tests/light/functional_tests/filterx/test_filterx.py b/tests/light/functional_tests/filterx/test_filterx.py index 53f521f3b..e65b3ae13 100644 --- a/tests/light/functional_tests/filterx/test_filterx.py +++ b/tests/light/functional_tests/filterx/test_filterx.py @@ -1938,6 +1938,9 @@ def test_regexp_subst(config, syslog_ng): $MSG.orgrp_global = regexp_subst("foobarbaz", "(fo|az)", "!", global=true); $MSG.ignore_case_control = regexp_subst("FoObArBaz", "(o|a)", "!", global=true); $MSG.ignore_case = regexp_subst("FoObArBaz", "(o|a)", "!", ignorecase=true, global=true); + $MSG.groups_off = regexp_subst("25-02-2022", /(\d{2})-(\d{2})-(\d{4})/, "\\3-\\2-\\1");; + $MSG.groups_on = regexp_subst("25-02-2022", /(\d{2})-(\d{2})-(\d{4})/, "\\3-\\2-\\1", groups=true); + $MSG.mixed_grps = regexp_subst("25-02-2022", /(\d{2})-(\d{2})-(\d{4})/, "foo:\\3-\\2-\\1:bar:baz", groups=true); """, ) syslog_ng.start(config) @@ -1956,7 +1959,10 @@ def test_regexp_subst(config, syslog_ng): r""""zero_length_match_global":"!f!o!o!b!a!r!b!a!z!",""" r""""orgrp_global":"!obarb!",""" r""""ignore_case_control":"F!ObArB!z",""" - r""""ignore_case":"F!!b!rB!z"}""" + "\n" + r""""ignore_case":"F!!b!rB!z",""" + r""""groups_off":"\\3-\\2-\\1",""" + r""""groups_on":"2022-02-25",""" + r""""mixed_grps":"foo:2022-02-25:bar:baz"}""" + "\n" ) assert file_true.read_log() == exp