diff --git a/lib/filterx/CMakeLists.txt b/lib/filterx/CMakeLists.txt index 9dbdc550b..40484cb38 100644 --- a/lib/filterx/CMakeLists.txt +++ b/lib/filterx/CMakeLists.txt @@ -16,6 +16,9 @@ set(FILTERX_HEADERS filterx/expr-null-coalesce.h filterx/expr-plus-generator.h filterx/expr-plus.h + filterx/expr-regexp-common.h + filterx/expr-regexp-search.h + filterx/expr-regexp-subst.h filterx/expr-regexp.h filterx/expr-set-subscript.h filterx/expr-setattr.h @@ -79,6 +82,9 @@ set(FILTERX_SOURCES filterx/expr-null-coalesce.c filterx/expr-plus-generator.c filterx/expr-plus.c + filterx/expr-regexp-common.c + filterx/expr-regexp-search.c + filterx/expr-regexp-subst.c filterx/expr-regexp.c filterx/expr-set-subscript.c filterx/expr-setattr.c diff --git a/lib/filterx/Makefile.am b/lib/filterx/Makefile.am index 880496194..fc876393b 100644 --- a/lib/filterx/Makefile.am +++ b/lib/filterx/Makefile.am @@ -18,6 +18,9 @@ filterxinclude_HEADERS = \ lib/filterx/expr-null-coalesce.h \ lib/filterx/expr-plus-generator.h \ lib/filterx/expr-plus.h \ + lib/filterx/expr-regexp-common.h \ + lib/filterx/expr-regexp-search.h \ + lib/filterx/expr-regexp-subst.h \ lib/filterx/expr-regexp.h \ lib/filterx/expr-set-subscript.h \ lib/filterx/expr-setattr.h \ @@ -81,6 +84,9 @@ filterx_sources = \ lib/filterx/expr-null-coalesce.c \ lib/filterx/expr-plus-generator.c \ lib/filterx/expr-plus.c \ + lib/filterx/expr-regexp-common.c \ + lib/filterx/expr-regexp-search.c \ + lib/filterx/expr-regexp-subst.c \ lib/filterx/expr-regexp.c \ lib/filterx/expr-set-subscript.c \ lib/filterx/expr-setattr.c \ diff --git a/lib/filterx/expr-regexp-common.c b/lib/filterx/expr-regexp-common.c new file mode 100644 index 000000000..6f352fda9 --- /dev/null +++ b/lib/filterx/expr-regexp-common.c @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2023 Axoflow + * Copyright (c) 2024 shifter + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#include "expr-regexp-common.h" +#include "filterx/object-extractor.h" + +pcre2_code_8 * +filterx_regexp_compile_pattern(const gchar *pattern, gboolean jit_enabled, gint opts) +{ + gint rc; + PCRE2_SIZE error_offset; + gint flags = opts | PCRE2_DUPNAMES; + + pcre2_code_8 *compiled = pcre2_compile((PCRE2_SPTR) pattern, PCRE2_ZERO_TERMINATED, flags, &rc, &error_offset, NULL); + if (!compiled) + { + PCRE2_UCHAR error_message[128]; + pcre2_get_error_message(rc, error_message, sizeof(error_message)); + msg_error("FilterX: Failed to compile regexp pattern", + evt_tag_str("pattern", pattern), + evt_tag_str("error", (const gchar *) error_message), + evt_tag_int("error_offset", (gint) error_offset)); + return NULL; + } + + if (jit_enabled) + { + rc = pcre2_jit_compile(compiled, PCRE2_JIT_COMPLETE); + if (rc < 0) + { + PCRE2_UCHAR error_message[128]; + pcre2_get_error_message(rc, error_message, sizeof(error_message)); + msg_debug("FilterX: Failed to JIT compile regular expression", + evt_tag_str("pattern", pattern), + evt_tag_str("error", (const gchar *) error_message)); + } + } + + return compiled; +} + +pcre2_code_8 * +filterx_regexp_compile_pattern_defaults(const gchar *pattern) +{ + return filterx_regexp_compile_pattern(pattern, TRUE, 0); +} + +void +filterx_expr_rematch_state_init(FilterXReMatchState *state) +{ + memset(state, 0, sizeof(FilterXReMatchState)); +} + +void +filterx_expr_rematch_state_cleanup(FilterXReMatchState *state) +{ + if (state->match_data) + pcre2_match_data_free(state->match_data); + filterx_object_unref(state->lhs_obj); + memset(state, 0, sizeof(FilterXReMatchState)); +} + +gboolean +filterx_regexp_extract_optional_arg_flag(FLAGSET *flags, const gchar **flag_names, guint64 flags_max, FLAGSET flag, + const gchar *usage, + FilterXFunctionArgs *args, GError **error) +{ + g_assert(flags); + gboolean exists, eval_error; + g_assert(flag < flags_max); + const gchar *arg_name = flag_names[flag]; + gboolean value = filterx_function_args_get_named_literal_boolean(args, arg_name, &exists, &eval_error); + if (!exists) + return TRUE; + + if (eval_error) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "%s argument must be boolean literal. %s", arg_name, usage); + return FALSE; + } + + set_flag(flags, flag, value); + + return TRUE; +} + +gboolean +filterx_regexp_match(FilterXReMatchState *state, pcre2_code_8 *pattern, gint start_offset) +{ + gint rc = pcre2_match(pattern, (PCRE2_SPTR) state->lhs_str, (PCRE2_SIZE) state->lhs_str_len, (PCRE2_SIZE) start_offset, + 0, + state->match_data, NULL); + state->rc = rc; + if (rc < 0) + { + switch (rc) + { + case PCRE2_ERROR_NOMATCH: + return FALSE; + default: + /* Handle other special cases */ + msg_error("FilterX: Error while matching regexp", evt_tag_int("error_code", rc)); + goto error; + } + } + else if (rc == 0) + { + msg_error("FilterX: Error while storing matching substrings, more than 256 capture groups encountered"); + goto error; + } + + return TRUE; + +error: + return FALSE; +} + +/* + * Returns whether lhs matched the pattern. + * Populates state if no error happened. + */ +gboolean +filterx_regexp_match_eval(FilterXExpr *lhs_expr, pcre2_code_8 *pattern, FilterXReMatchState *state) +{ + state->lhs_obj = filterx_expr_eval(lhs_expr); + if (!state->lhs_obj) + goto error; + + if (!filterx_object_extract_string_ref(state->lhs_obj, &state->lhs_str, &state->lhs_str_len)) + { + msg_error("FilterX: Regexp matching left hand side must be string type", + evt_tag_str("type", state->lhs_obj->type->name)); + goto error; + } + + state->match_data = pcre2_match_data_create_from_pattern(pattern, NULL); + return filterx_regexp_match(state, pattern, 0); +error: + filterx_expr_rematch_state_cleanup(state); + return FALSE; +} diff --git a/lib/filterx/expr-regexp-common.h b/lib/filterx/expr-regexp-common.h new file mode 100644 index 000000000..d2c39a9be --- /dev/null +++ b/lib/filterx/expr-regexp-common.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2023 Axoflow + * Copyright (c) 2024 shifter + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#ifndef FILTERX_EXPR_REGEXP_COMMON_H_INCLUDED +#define FILTERX_EXPR_REGEXP_COMMON_H_INCLUDED + +#include "compat/pcre.h" +#include "filterx/object-primitive.h" +#include "filterx/func-flags.h" +#include "filterx/expr-function.h" + +typedef struct FilterXReMatchState_ +{ + pcre2_match_data *match_data; + FilterXObject *lhs_obj; + const gchar *lhs_str; + gsize lhs_str_len; + gint rc; + FLAGSET flags; +} FilterXReMatchState; + +pcre2_code_8 *filterx_regexp_compile_pattern(const gchar *pattern, gboolean jit_enabled, gint opts); +pcre2_code_8 *filterx_regexp_compile_pattern_defaults(const gchar *pattern); + +void filterx_expr_rematch_state_init(FilterXReMatchState *state); +void filterx_expr_rematch_state_cleanup(FilterXReMatchState *state); + +gboolean filterx_regexp_extract_optional_arg_flag(FLAGSET *flags, const gchar **flag_names, guint64 flags_max, + FLAGSET flag, const gchar *usage, FilterXFunctionArgs *args, GError **error); + +gboolean filterx_regexp_match(FilterXReMatchState *state, pcre2_code_8 *pattern, gint start_offset); +gboolean filterx_regexp_match_eval(FilterXExpr *lhs_expr, pcre2_code_8 *pattern, FilterXReMatchState *state); + +static inline gint +match_start_offset(PCRE2_SIZE *ovector) +{ + return ovector[0]; +} + +static inline gint +match_end_offset(PCRE2_SIZE *ovector) +{ + return ovector[1]; +} + +static inline gboolean +is_zero_length_match(PCRE2_SIZE *ovector) +{ + return ovector[0] == ovector[1]; +} + +#endif diff --git a/lib/filterx/expr-regexp-search.c b/lib/filterx/expr-regexp-search.c new file mode 100644 index 000000000..2efd9cf69 --- /dev/null +++ b/lib/filterx/expr-regexp-search.c @@ -0,0 +1,318 @@ +/* + * Copyright (c) 2023 Axoflow + * Copyright (c) 2024 shifter + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#include "expr-regexp-search.h" +#include "filterx/expr-regexp.h" +#include "filterx/object-primitive.h" +#include "filterx/object-extractor.h" +#include "filterx/object-string.h" +#include "filterx/object-list-interface.h" +#include "filterx/object-dict-interface.h" +#include "filterx/expr-function.h" +#include "filterx/filterx-object-istype.h" +#include "filterx/filterx-ref.h" +#include "filterx/expr-regexp-common.h" +#include "compat/pcre.h" +#include "scratch-buffers.h" + +DEFINE_FUNC_FLAG_NAMES(FilterXRegexpSearchFlags, + FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO_NAME, + FILTERX_REGEXP_SEARCH_LIST_MODE_NAME + ); + +#define FILTERX_FUNC_REGEXP_SEARCH_USAGE "Usage: regexp_search(string, pattern, " \ +FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO_NAME"=(boolean), "\ +FILTERX_REGEXP_SEARCH_LIST_MODE_NAME"=(boolean))" + +typedef struct FilterXExprRegexpSearchGenerator_ +{ + FilterXGeneratorFunction super; + FilterXExpr *lhs; + pcre2_code_8 *pattern; + FLAGSET flags; +} FilterXExprRegexpSearchGenerator; + +static gboolean +_store_matches_to_list(pcre2_code_8 *pattern, const FilterXReMatchState *state, FilterXObject *fillable) +{ + guint32 num_matches = pcre2_get_ovector_count(state->match_data); + PCRE2_SIZE *matches = pcre2_get_ovector_pointer(state->match_data); + + for (gint i = 0; i < num_matches; i++) + { + if (num_matches > 1 && i==0 && !check_flag(state->flags, FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO)) + continue; + gint begin_index = matches[2 * i]; + gint end_index = matches[2 * i + 1]; + if (begin_index < 0 || end_index < 0) + continue; + + FilterXObject *value = filterx_string_new(state->lhs_str + begin_index, end_index - begin_index); + gboolean success = filterx_list_append(fillable, &value); + filterx_object_unref(value); + + if (!success) + { + msg_error("FilterX: Failed to append regexp match to list", evt_tag_int("index", i)); + return FALSE; + } + } + + return TRUE; +} + +static gboolean +_store_matches_to_dict(pcre2_code_8 *pattern, const FilterXReMatchState *state, FilterXObject *fillable) +{ + PCRE2_SIZE *matches = pcre2_get_ovector_pointer(state->match_data); + guint32 num_matches = pcre2_get_ovector_count(state->match_data); + gchar num_str_buf[G_ASCII_DTOSTR_BUF_SIZE]; + + /* First store all matches with string formatted indexes as keys. */ + for (guint32 i = 0; i < num_matches; i++) + { + if (num_matches > 1 && i==0 && !check_flag(state->flags, FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO)) + continue; + + PCRE2_SIZE begin_index = matches[2 * i]; + PCRE2_SIZE end_index = matches[2 * i + 1]; + if (begin_index < 0 || end_index < 0) + continue; + + g_snprintf(num_str_buf, sizeof(num_str_buf), "%" G_GUINT32_FORMAT, i); + FilterXObject *key = filterx_string_new(num_str_buf, -1); + FilterXObject *value = filterx_string_new(state->lhs_str + begin_index, end_index - begin_index); + + gboolean success = filterx_object_set_subscript(fillable, key, &value); + + filterx_object_unref(key); + filterx_object_unref(value); + + if (!success) + { + msg_error("FilterX: Failed to add regexp match to dict", evt_tag_str("key", num_str_buf)); + return FALSE; + } + } + + gchar *name_table = NULL; + guint32 name_entry_size = 0; + guint32 namecount = 0; + pcre2_pattern_info(pattern, PCRE2_INFO_NAMETABLE, &name_table); + pcre2_pattern_info(pattern, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size); + pcre2_pattern_info(pattern, PCRE2_INFO_NAMECOUNT, &namecount); + + /* Rename named matches. */ + for (guint32 i = 0; i < namecount; i++, name_table += name_entry_size) + { + int n = (name_table[0] << 8) | name_table[1]; + PCRE2_SIZE begin_index = matches[2 * n]; + PCRE2_SIZE end_index = matches[2 * n + 1]; + const gchar *namedgroup_name = name_table + 2; + + if (begin_index < 0 || end_index < 0) + continue; + + g_snprintf(num_str_buf, sizeof(num_str_buf), "%" G_GUINT32_FORMAT, n); + FilterXObject *num_key = filterx_string_new(num_str_buf, -1); + FilterXObject *key = filterx_string_new(namedgroup_name, -1); + FilterXObject *value = filterx_object_get_subscript(fillable, num_key); + + gboolean success = filterx_object_set_subscript(fillable, key, &value); + g_assert(filterx_object_unset_key(fillable, num_key)); + + filterx_object_unref(key); + filterx_object_unref(num_key); + filterx_object_unref(value); + + if (!success) + { + msg_error("FilterX: Failed to add regexp match to dict", evt_tag_str("key", namedgroup_name)); + return FALSE; + } + } + + return TRUE; +} + +static gboolean +_store_matches(pcre2_code_8 *pattern, const FilterXReMatchState *state, FilterXObject *fillable) +{ + fillable = filterx_ref_unwrap_rw(fillable); + + if (filterx_object_is_type(fillable, &FILTERX_TYPE_NAME(list))) + return _store_matches_to_list(pattern, state, fillable); + + if (filterx_object_is_type(fillable, &FILTERX_TYPE_NAME(dict))) + return _store_matches_to_dict(pattern, state, fillable); + + msg_error("FilterX: Failed to store regexp match data, invalid fillable type", + evt_tag_str("type", fillable->type->name)); + return FALSE; +} + +static gboolean +_regexp_search_generator_generate(FilterXExprGenerator *s, FilterXObject *fillable) +{ + FilterXExprRegexpSearchGenerator *self = (FilterXExprRegexpSearchGenerator *) s; + + gboolean result; + FilterXReMatchState state; + filterx_expr_rematch_state_init(&state); + state.flags = self->flags; + + gboolean matched = filterx_regexp_match_eval(self->lhs, self->pattern, &state); + if (!matched) + { + result = TRUE; + goto exit; + } + + if (!state.match_data) + { + /* Error happened during matching. */ + result = FALSE; + goto exit; + } + + result = _store_matches(self->pattern, &state, fillable); + +exit: + filterx_expr_rematch_state_cleanup(&state); + return result; +} + +static FilterXObject * +_regexp_search_generator_create_container(FilterXExprGenerator *s, FilterXExpr *fillable_parent) +{ + FilterXExprRegexpSearchGenerator *self = (FilterXExprRegexpSearchGenerator *) s; + + if (check_flag(self->flags, FILTERX_REGEXP_SEARCH_LIST_MODE)) + return filterx_generator_create_list_container(s, fillable_parent); + + return filterx_generator_create_dict_container(s, fillable_parent); +} + +static gboolean +_regexp_search_generator_init(FilterXExpr *s, GlobalConfig *cfg) +{ + FilterXExprRegexpSearchGenerator *self = (FilterXExprRegexpSearchGenerator *) s; + + if (!filterx_expr_init(self->lhs, cfg)) + return FALSE; + + return filterx_generator_init_method(s, cfg); +} + +static void +_regexp_search_generator_deinit(FilterXExpr *s, GlobalConfig *cfg) +{ + FilterXExprRegexpSearchGenerator *self = (FilterXExprRegexpSearchGenerator *) s; + + filterx_expr_deinit(self->lhs, cfg); + filterx_generator_deinit_method(s, cfg); +} + +static void +_regexp_search_generator_free(FilterXExpr *s) +{ + FilterXExprRegexpSearchGenerator *self = (FilterXExprRegexpSearchGenerator *) s; + + filterx_expr_unref(self->lhs); + if (self->pattern) + pcre2_code_free(self->pattern); + filterx_generator_function_free_method(&self->super); +} + +static gboolean +_extract_optional_arg_flag(FilterXExprRegexpSearchGenerator *self, FilterXRegexpSearchFlags flag, + FilterXFunctionArgs *args, GError **error) +{ + return filterx_regexp_extract_optional_arg_flag(&self->flags, FilterXRegexpSearchFlags_NAMES, + FilterXRegexpSearchFlags_MAX, flag, FILTERX_FUNC_REGEXP_SEARCH_USAGE, args, error); +} + +static gboolean +_extract_search_args(FilterXExprRegexpSearchGenerator *self, FilterXFunctionArgs *args, GError **error) +{ + if (filterx_function_args_len(args) != 2) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "invalid number of arguments. " FILTERX_FUNC_REGEXP_SEARCH_USAGE); + return FALSE; + } + + self->lhs = filterx_function_args_get_expr(args, 0); + + const gchar *pattern = filterx_function_args_get_literal_string(args, 1, NULL); + if (!pattern) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "pattern must be string literal. " FILTERX_FUNC_REGEXP_SEARCH_USAGE); + return FALSE; + } + + self->pattern = filterx_regexp_compile_pattern_defaults(pattern); + if (!self->pattern) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "failed to compile pattern. " FILTERX_FUNC_REGEXP_SEARCH_USAGE); + return FALSE; + } + + return TRUE; + +} + +/* Takes reference of lhs */ +FilterXExpr * +filterx_generator_function_regexp_search_new(FilterXFunctionArgs *args, GError **error) +{ + FilterXExprRegexpSearchGenerator *self = g_new0(FilterXExprRegexpSearchGenerator, 1); + + filterx_generator_function_init_instance(&self->super, "regexp_search"); + self->super.super.generate = _regexp_search_generator_generate; + self->super.super.super.init = _regexp_search_generator_init; + self->super.super.super.deinit = _regexp_search_generator_deinit; + self->super.super.super.free_fn = _regexp_search_generator_free; + self->super.super.create_container = _regexp_search_generator_create_container; + + if (!_extract_optional_arg_flag(self, FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO, args, error)) + goto error; + + if (!_extract_optional_arg_flag(self, FILTERX_REGEXP_SEARCH_LIST_MODE, args, error)) + goto error; + + if (!_extract_search_args(self, args, error) || + !filterx_function_args_check(args, error)) + goto error; + + filterx_function_args_free(args); + return &self->super.super.super; + +error: + filterx_function_args_free(args); + filterx_expr_unref(&self->super.super.super); + return NULL; +} diff --git a/lib/filterx/expr-regexp-search.h b/lib/filterx/expr-regexp-search.h new file mode 100644 index 000000000..3b0468628 --- /dev/null +++ b/lib/filterx/expr-regexp-search.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2023 Axoflow + * Copyright (c) 2024 shifter + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#ifndef FILTERX_EXPR_REGEXP_SEARCH_H_INCLUDED +#define FILTERX_EXPR_REGEXP_SEARCH_H_INCLUDED + +#include "filterx/filterx-expr.h" +#include "filterx/expr-generator.h" +#include "filterx/expr-function.h" +#include "filterx/func-flags.h" + +DEFINE_FUNC_FLAGS(FilterXRegexpSearchFlags, + FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO, + FILTERX_REGEXP_SEARCH_LIST_MODE + ); + +#define FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO_NAME "keep_zero" +#define FILTERX_REGEXP_SEARCH_LIST_MODE_NAME "list_mode" + +extern const char *FilterXRegexpSearchFlags_NAMES[]; + +FilterXExpr *filterx_generator_function_regexp_search_new(FilterXFunctionArgs *args, GError **error); + +#endif diff --git a/lib/filterx/expr-regexp-subst.c b/lib/filterx/expr-regexp-subst.c new file mode 100644 index 000000000..59e25798d --- /dev/null +++ b/lib/filterx/expr-regexp-subst.c @@ -0,0 +1,347 @@ +/* + * Copyright (c) 2023 Axoflow + * Copyright (c) 2024 shifter + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#include "expr-regexp-subst.h" +#include "filterx/expr-regexp.h" +#include "filterx/object-primitive.h" +#include "filterx/object-extractor.h" +#include "filterx/object-string.h" +#include "filterx/object-list-interface.h" +#include "filterx/object-dict-interface.h" +#include "filterx/expr-function.h" +#include "filterx/filterx-object-istype.h" +#include "filterx/filterx-ref.h" +#include "filterx/expr-regexp-common.h" +#include "compat/pcre.h" +#include "scratch-buffers.h" + +DEFINE_FUNC_FLAG_NAMES(FilterXRegexpSubstFlags, + FILTERX_FUNC_REGEXP_SUBST_FLAG_JIT_NAME, + FILTERX_FUNC_REGEXP_SUBST_FLAG_GLOBAL_NAME, + FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8_NAME, + FILTERX_FUNC_REGEXP_SUBST_FLAG_IGNORECASE_NAME, + FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE_NAME, + FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME + ); + +#define FILTERX_FUNC_REGEXP_SUBST_USAGE "Usage: regexp_subst(string, pattern, replacement, " \ + FILTERX_FUNC_REGEXP_SUBST_FLAG_JIT_NAME"=(boolean) " \ + FILTERX_FUNC_REGEXP_SUBST_FLAG_GLOBAL_NAME"=(boolean) " \ + FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8_NAME"=(boolean) " \ + FILTERX_FUNC_REGEXP_SUBST_FLAG_IGNORECASE_NAME"=(boolean) " \ + FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE_NAME"=(boolean)" \ + FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME"=(boolean))" \ + + +typedef struct FilterXFuncRegexpSubst_ +{ + FilterXFunction super; + FilterXExpr *string_expr; + pcre2_code_8 *pattern; + gchar *replacement; + FLAGSET flags; +} FilterXFuncRegexpSubst; + +static gboolean +_build_replacement_stirng_with_match_groups(const FilterXFuncRegexpSubst *self, FilterXReMatchState *state, + GString *replacement_string) +{ + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(state->match_data); + g_string_set_size(replacement_string, 0); + const gchar *rep_ptr = self->replacement; + const gchar *last_ptr = rep_ptr; + gint num_grps = state->rc; + + while (*rep_ptr) + { + if (*rep_ptr == '\\') + { + rep_ptr++; + if (*rep_ptr >= '1' && *rep_ptr <= '9') + { + gint grp_idx = *rep_ptr - '0'; + if (grp_idx < num_grps) + { + PCRE2_SIZE start = ovector[2 * grp_idx]; + PCRE2_SIZE end = ovector[2 * grp_idx + 1]; + if (start != PCRE2_UNSET) + { + g_string_append_len(replacement_string, last_ptr, rep_ptr - last_ptr - 1); + last_ptr = rep_ptr + 1; + size_t group_len = end - start; + g_string_append_len(replacement_string, state->lhs_str + start, group_len); + } + } + } + rep_ptr++; + } + else + rep_ptr++; + } + g_string_append_len(replacement_string, last_ptr, rep_ptr - last_ptr); + return TRUE; +} + +static FilterXObject * +_replace_matches(const FilterXFuncRegexpSubst *self, FilterXReMatchState *state) +{ + GString *new_value = scratch_buffers_alloc(); + PCRE2_SIZE *ovector = NULL; + gint pos = 0; + const gchar *replacement_string = self->replacement; + + if (check_flag(self->flags, FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS)) + { + GString *rep_str = scratch_buffers_alloc(); + _build_replacement_stirng_with_match_groups(self, state, rep_str); + replacement_string = rep_str->str; + } + + do + { + ovector = pcre2_get_ovector_pointer(state->match_data); + + g_string_append_len(new_value, state->lhs_str + pos, match_start_offset(ovector) - pos); + g_string_append(new_value, replacement_string); + + if (is_zero_length_match(ovector)) + { + g_string_append_len(new_value, state->lhs_str + pos, 1); + pos++; + } + else + pos = match_end_offset(ovector); + + if (!filterx_regexp_match(state, self->pattern, pos)) + break; + } + while ((pos < state->lhs_str_len) && check_flag(self->flags, FILTERX_FUNC_REGEXP_SUBST_FLAG_GLOBAL)); + + // add the rest of the string + g_string_append_len(new_value, state->lhs_str + pos, state->lhs_str_len - pos); + + // handle the very last of zero lenght matches + if (is_zero_length_match(ovector)) + g_string_append(new_value, replacement_string); + + return filterx_string_new(new_value->str, new_value->len); +} + +static FilterXObject * +_subst_eval(FilterXExpr *s) +{ + FilterXFuncRegexpSubst *self = (FilterXFuncRegexpSubst *) s; + + FilterXObject *result = NULL; + FilterXReMatchState state; + filterx_expr_rematch_state_init(&state); + + gboolean matched = filterx_regexp_match_eval(self->string_expr, self->pattern, &state); + if (!matched) + { + result = filterx_object_ref(state.lhs_obj); + goto exit; + } + + if (!state.match_data) + { + /* Error happened during matching. */ + result = NULL; + goto exit; + } + + result = _replace_matches(self, &state); + +exit: + filterx_expr_rematch_state_cleanup(&state); + return result; +} + +static FilterXExpr * +_extract_subst_string_expr_arg(FilterXFunctionArgs *args, GError **error) +{ + return filterx_function_args_get_expr(args, 0); +} + +static gint +_create_compile_opts(FLAGSET flags) +{ + gboolean utf8 = check_flag(flags, FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8); + gboolean ignorecase = check_flag(flags, FILTERX_FUNC_REGEXP_SUBST_FLAG_IGNORECASE); + gboolean newline = check_flag(flags, FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE); + + gint res = 0; + res ^= (-utf8 ^ res) & PCRE2_NO_UTF_CHECK; + res ^= (-ignorecase ^ res) & PCRE2_CASELESS; + res ^= (-newline ^ res) & PCRE2_NEWLINE_ANYCRLF; + return res; +} + +static pcre2_code_8 * +_extract_subst_pattern_arg(FilterXFuncRegexpSubst *self, FilterXFunctionArgs *args, GError **error) +{ + const gchar *pattern = filterx_function_args_get_literal_string(args, 1, NULL); + if (!pattern) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "argument must be a string literal: pattern. " FILTERX_FUNC_REGEXP_SUBST_USAGE); + return NULL; + } + + return filterx_regexp_compile_pattern(pattern, check_flag(self->flags, FILTERX_FUNC_REGEXP_SUBST_FLAG_JIT), + _create_compile_opts(self->flags)); +} + +static gchar * +_extract_subst_replacement_arg(FilterXFunctionArgs *args, GError **error) +{ + const gchar *replacement = filterx_function_args_get_literal_string(args, 2, NULL); + if (!replacement) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "argument must be a string literal: replacement. " FILTERX_FUNC_REGEXP_SUBST_USAGE); + return NULL; + } + + return g_strdup(replacement); +} + +static gboolean +_extract_optional_arg_flag(FilterXFuncRegexpSubst *self, FilterXRegexpSubstFlags flag, + FilterXFunctionArgs *args, GError **error) +{ + return filterx_regexp_extract_optional_arg_flag(&self->flags, FilterXRegexpSubstFlags_NAMES, + FilterXRegexpSubstFlags_MAX, flag, FILTERX_FUNC_REGEXP_SUBST_USAGE, args, error); +} + +static gboolean +_extract_optional_flags(FilterXFuncRegexpSubst *self, FilterXFunctionArgs *args, GError **error) +{ + if (!_extract_optional_arg_flag(self, FILTERX_FUNC_REGEXP_SUBST_FLAG_GLOBAL, args, error)) + return FALSE; + if (!_extract_optional_arg_flag(self, FILTERX_FUNC_REGEXP_SUBST_FLAG_JIT, args, error)) + return FALSE; + if (!_extract_optional_arg_flag(self, FILTERX_FUNC_REGEXP_SUBST_FLAG_IGNORECASE, args, error)) + return FALSE; + if (!_extract_optional_arg_flag(self, FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE, args, error)) + return FALSE; + if (!_extract_optional_arg_flag(self, FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8, args, error)) + return FALSE; + if (!_extract_optional_arg_flag(self, FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS, args, error)) + return FALSE; + return TRUE; +} + +static gboolean +_extract_subst_args(FilterXFuncRegexpSubst *self, FilterXFunctionArgs *args, GError **error) +{ + if (filterx_function_args_len(args) != 3) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "invalid number of arguments. " FILTERX_FUNC_REGEXP_SUBST_USAGE); + return FALSE; + } + + self->string_expr = _extract_subst_string_expr_arg(args, error); + if (!self->string_expr) + return FALSE; + + if (!_extract_optional_flags(self, args, error)) + return FALSE; + + self->pattern = _extract_subst_pattern_arg(self, args, error); + if (!self->pattern) + return FALSE; + + self->replacement = _extract_subst_replacement_arg(args, error); + if (!self->replacement) + return FALSE; + + + return TRUE; +} + +static gboolean +_subst_init(FilterXExpr *s, GlobalConfig *cfg) +{ + FilterXFuncRegexpSubst *self = (FilterXFuncRegexpSubst *) s; + + if (!filterx_expr_init(self->string_expr, cfg)) + return FALSE; + + return filterx_function_init_method(&self->super, cfg); +} + +static void +_subst_deinit(FilterXExpr *s, GlobalConfig *cfg) +{ + FilterXFuncRegexpSubst *self = (FilterXFuncRegexpSubst *) s; + filterx_expr_deinit(self->string_expr, cfg); + filterx_function_deinit_method(&self->super, cfg); +} + +static void +_subst_free(FilterXExpr *s) +{ + FilterXFuncRegexpSubst *self = (FilterXFuncRegexpSubst *) s; + filterx_expr_unref(self->string_expr); + if (self->pattern) + pcre2_code_free(self->pattern); + g_free(self->replacement); + filterx_function_free_method(&self->super); +} + +FilterXExpr * +filterx_function_regexp_subst_new(FilterXFunctionArgs *args, GError **error) +{ + FilterXFuncRegexpSubst *self = g_new0(FilterXFuncRegexpSubst, 1); + filterx_function_init_instance(&self->super, "regexp_subst"); + self->super.super.eval = _subst_eval; + self->super.super.init = _subst_init; + self->super.super.deinit = _subst_deinit; + self->super.super.free_fn = _subst_free; + + reset_flags(&self->flags, FLAG_VAL(FILTERX_FUNC_REGEXP_SUBST_FLAG_JIT)); + if (!_extract_subst_args(self, args, error) || + !filterx_function_args_check(args, error)) + goto error; + + filterx_function_args_free(args); + return &self->super.super; + +error: + filterx_function_args_free(args); + filterx_expr_unref(&self->super.super); + return NULL; +} + +gboolean +filterx_regexp_subst_is_jit_enabled(FilterXExpr *s) +{ + g_assert(s); + FilterXFuncRegexpSubst *self = (FilterXFuncRegexpSubst *)s; + PCRE2_SIZE jit_size; + int info_result = pcre2_pattern_info(self->pattern, PCRE2_INFO_JITSIZE, &jit_size); + return info_result == 0 && jit_size > 0; +} diff --git a/lib/filterx/expr-regexp-subst.h b/lib/filterx/expr-regexp-subst.h new file mode 100644 index 000000000..cfe6661ed --- /dev/null +++ b/lib/filterx/expr-regexp-subst.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2023 Axoflow + * Copyright (c) 2024 shifter + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#ifndef FILTERX_EXPR_REGEXP_SUBST_H_INCLUDED +#define FILTERX_EXPR_REGEXP_SUBST_H_INCLUDED + +#include "filterx/filterx-expr.h" +#include "filterx/expr-generator.h" +#include "filterx/expr-function.h" +#include "filterx/func-flags.h" + +DEFINE_FUNC_FLAGS(FilterXRegexpSubstFlags, + FILTERX_FUNC_REGEXP_SUBST_FLAG_JIT, + FILTERX_FUNC_REGEXP_SUBST_FLAG_GLOBAL, + FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8, + FILTERX_FUNC_REGEXP_SUBST_FLAG_IGNORECASE, + FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE, + FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS + ); + +#define FILTERX_FUNC_REGEXP_SUBST_FLAG_JIT_NAME "jit" +#define FILTERX_FUNC_REGEXP_SUBST_FLAG_GLOBAL_NAME "global" +#define FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8_NAME "utf8" +#define FILTERX_FUNC_REGEXP_SUBST_FLAG_IGNORECASE_NAME "ignorecase" +#define FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE_NAME "newline" +#define FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME "groups" + +extern const char *FilterXRegexpSubstFlags_NAMES[]; + +FilterXExpr *filterx_function_regexp_subst_new(FilterXFunctionArgs *args, GError **error); +gboolean filterx_regexp_subst_is_jit_enabled(FilterXExpr *s); + +#endif diff --git a/lib/filterx/expr-regexp.c b/lib/filterx/expr-regexp.c index f55bb25b9..cc02da193 100644 --- a/lib/filterx/expr-regexp.c +++ b/lib/filterx/expr-regexp.c @@ -32,265 +32,7 @@ #include "filterx/filterx-ref.h" #include "compat/pcre.h" #include "scratch-buffers.h" - -#define FILTERX_FUNC_REGEXP_SUBST_USAGE "Usage: regexp_subst(string, pattern, replacement, " \ - FILTERX_FUNC_REGEXP_SUBST_FLAG_JIT_NAME"=(boolean) " \ - FILTERX_FUNC_REGEXP_SUBST_FLAG_GLOBAL_NAME"=(boolean) " \ - FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8_NAME"=(boolean) " \ - FILTERX_FUNC_REGEXP_SUBST_FLAG_IGNORECASE_NAME"=(boolean) " \ - FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE_NAME"=(boolean)" \ - FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME"=(boolean))" \ - -DEFINE_FUNC_FLAG_NAMES(FilterXRegexpSearchFlags, - FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO_NAME, - FILTERX_REGEXP_SEARCH_LIST_MODE_NAME - ); - -#define FILTERX_FUNC_REGEXP_SEARCH_USAGE "Usage: regexp_search(string, pattern, " \ -FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO_NAME"=(boolean), "\ -FILTERX_REGEXP_SEARCH_LIST_MODE_NAME"=(boolean))" - -typedef struct FilterXReMatchState_ -{ - pcre2_match_data *match_data; - FilterXObject *lhs_obj; - const gchar *lhs_str; - gsize lhs_str_len; - gint rc; - FLAGSET flags; -} FilterXReMatchState; - -static void -_state_init(FilterXReMatchState *state) -{ - memset(state, 0, sizeof(FilterXReMatchState)); -} - -static void -_state_cleanup(FilterXReMatchState *state) -{ - if (state->match_data) - pcre2_match_data_free(state->match_data); - filterx_object_unref(state->lhs_obj); - memset(state, 0, sizeof(FilterXReMatchState)); -} - -static pcre2_code_8 * -_compile_pattern(const gchar *pattern, gboolean jit_enabled, gint opts) -{ - gint rc; - PCRE2_SIZE error_offset; - gint flags = opts | PCRE2_DUPNAMES; - - pcre2_code_8 *compiled = pcre2_compile((PCRE2_SPTR) pattern, PCRE2_ZERO_TERMINATED, flags, &rc, &error_offset, NULL); - if (!compiled) - { - PCRE2_UCHAR error_message[128]; - pcre2_get_error_message(rc, error_message, sizeof(error_message)); - msg_error("FilterX: Failed to compile regexp pattern", - evt_tag_str("pattern", pattern), - evt_tag_str("error", (const gchar *) error_message), - evt_tag_int("error_offset", (gint) error_offset)); - return NULL; - } - - if (jit_enabled) - { - rc = pcre2_jit_compile(compiled, PCRE2_JIT_COMPLETE); - if (rc < 0) - { - PCRE2_UCHAR error_message[128]; - pcre2_get_error_message(rc, error_message, sizeof(error_message)); - msg_debug("FilterX: Failed to JIT compile regular expression", - evt_tag_str("pattern", pattern), - evt_tag_str("error", (const gchar *) error_message)); - } - } - - return compiled; -} - -static pcre2_code_8 * -_compile_pattern_defaults(const gchar *pattern) -{ - return _compile_pattern(pattern, TRUE, 0); -} - -static gboolean -_match_inner(FilterXReMatchState *state, pcre2_code_8 *pattern, gint start_offset) -{ - gint rc = pcre2_match(pattern, (PCRE2_SPTR) state->lhs_str, (PCRE2_SIZE) state->lhs_str_len, (PCRE2_SIZE) start_offset, - 0, - state->match_data, NULL); - state->rc = rc; - if (rc < 0) - { - switch (rc) - { - case PCRE2_ERROR_NOMATCH: - return FALSE; - default: - /* Handle other special cases */ - msg_error("FilterX: Error while matching regexp", evt_tag_int("error_code", rc)); - goto error; - } - } - else if (rc == 0) - { - msg_error("FilterX: Error while storing matching substrings, more than 256 capture groups encountered"); - goto error; - } - - return TRUE; - -error: - return FALSE; -} - -/* - * Returns whether lhs matched the pattern. - * Populates state if no error happened. - */ -static gboolean -_match(FilterXExpr *lhs_expr, pcre2_code_8 *pattern, FilterXReMatchState *state) -{ - state->lhs_obj = filterx_expr_eval(lhs_expr); - if (!state->lhs_obj) - goto error; - - if (!filterx_object_extract_string_ref(state->lhs_obj, &state->lhs_str, &state->lhs_str_len)) - { - msg_error("FilterX: Regexp matching left hand side must be string type", - evt_tag_str("type", state->lhs_obj->type->name)); - goto error; - } - - state->match_data = pcre2_match_data_create_from_pattern(pattern, NULL); - return _match_inner(state, pattern, 0); -error: - _state_cleanup(state); - return FALSE; -} - -static gboolean -_store_matches_to_list(pcre2_code_8 *pattern, const FilterXReMatchState *state, FilterXObject *fillable) -{ - guint32 num_matches = pcre2_get_ovector_count(state->match_data); - PCRE2_SIZE *matches = pcre2_get_ovector_pointer(state->match_data); - - for (gint i = 0; i < num_matches; i++) - { - if (num_matches > 1 && i==0 && !check_flag(state->flags, FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO)) - continue; - gint begin_index = matches[2 * i]; - gint end_index = matches[2 * i + 1]; - if (begin_index < 0 || end_index < 0) - continue; - - FilterXObject *value = filterx_string_new(state->lhs_str + begin_index, end_index - begin_index); - gboolean success = filterx_list_append(fillable, &value); - filterx_object_unref(value); - - if (!success) - { - msg_error("FilterX: Failed to append regexp match to list", evt_tag_int("index", i)); - return FALSE; - } - } - - return TRUE; -} - -static gboolean -_store_matches_to_dict(pcre2_code_8 *pattern, const FilterXReMatchState *state, FilterXObject *fillable) -{ - PCRE2_SIZE *matches = pcre2_get_ovector_pointer(state->match_data); - guint32 num_matches = pcre2_get_ovector_count(state->match_data); - gchar num_str_buf[G_ASCII_DTOSTR_BUF_SIZE]; - - /* First store all matches with string formatted indexes as keys. */ - for (guint32 i = 0; i < num_matches; i++) - { - if (num_matches > 1 && i==0 && !check_flag(state->flags, FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO)) - continue; - - PCRE2_SIZE begin_index = matches[2 * i]; - PCRE2_SIZE end_index = matches[2 * i + 1]; - if (begin_index < 0 || end_index < 0) - continue; - - g_snprintf(num_str_buf, sizeof(num_str_buf), "%" G_GUINT32_FORMAT, i); - FilterXObject *key = filterx_string_new(num_str_buf, -1); - FilterXObject *value = filterx_string_new(state->lhs_str + begin_index, end_index - begin_index); - - gboolean success = filterx_object_set_subscript(fillable, key, &value); - - filterx_object_unref(key); - filterx_object_unref(value); - - if (!success) - { - msg_error("FilterX: Failed to add regexp match to dict", evt_tag_str("key", num_str_buf)); - return FALSE; - } - } - - gchar *name_table = NULL; - guint32 name_entry_size = 0; - guint32 namecount = 0; - pcre2_pattern_info(pattern, PCRE2_INFO_NAMETABLE, &name_table); - pcre2_pattern_info(pattern, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size); - pcre2_pattern_info(pattern, PCRE2_INFO_NAMECOUNT, &namecount); - - /* Rename named matches. */ - for (guint32 i = 0; i < namecount; i++, name_table += name_entry_size) - { - int n = (name_table[0] << 8) | name_table[1]; - PCRE2_SIZE begin_index = matches[2 * n]; - PCRE2_SIZE end_index = matches[2 * n + 1]; - const gchar *namedgroup_name = name_table + 2; - - if (begin_index < 0 || end_index < 0) - continue; - - g_snprintf(num_str_buf, sizeof(num_str_buf), "%" G_GUINT32_FORMAT, n); - FilterXObject *num_key = filterx_string_new(num_str_buf, -1); - FilterXObject *key = filterx_string_new(namedgroup_name, -1); - FilterXObject *value = filterx_object_get_subscript(fillable, num_key); - - gboolean success = filterx_object_set_subscript(fillable, key, &value); - g_assert(filterx_object_unset_key(fillable, num_key)); - - filterx_object_unref(key); - filterx_object_unref(num_key); - filterx_object_unref(value); - - if (!success) - { - msg_error("FilterX: Failed to add regexp match to dict", evt_tag_str("key", namedgroup_name)); - return FALSE; - } - } - - return TRUE; -} - -static gboolean -_store_matches(pcre2_code_8 *pattern, const FilterXReMatchState *state, FilterXObject *fillable) -{ - fillable = filterx_ref_unwrap_rw(fillable); - - if (filterx_object_is_type(fillable, &FILTERX_TYPE_NAME(list))) - return _store_matches_to_list(pattern, state, fillable); - - if (filterx_object_is_type(fillable, &FILTERX_TYPE_NAME(dict))) - return _store_matches_to_dict(pattern, state, fillable); - - msg_error("FilterX: Failed to store regexp match data, invalid fillable type", - evt_tag_str("type", fillable->type->name)); - return FALSE; -} - +#include "filterx/expr-regexp-common.h" typedef struct FilterXExprRegexpMatch_ { @@ -307,9 +49,9 @@ _regexp_match_eval(FilterXExpr *s) FilterXObject *result = NULL; FilterXReMatchState state; - _state_init(&state); + filterx_expr_rematch_state_init(&state); - gboolean matched = _match(self->lhs, self->pattern, &state); + gboolean matched = filterx_regexp_match_eval(self->lhs, self->pattern, &state); if (!state.match_data) { /* Error happened during matching. */ @@ -319,7 +61,7 @@ _regexp_match_eval(FilterXExpr *s) result = filterx_boolean_new(matched != self->invert); exit: - _state_cleanup(&state); + filterx_expr_rematch_state_cleanup(&state); return result; } @@ -367,7 +109,7 @@ filterx_expr_regexp_match_new(FilterXExpr *lhs, const gchar *pattern) self->super.free_fn = _regexp_match_free; self->lhs = lhs; - self->pattern = _compile_pattern_defaults(pattern); + self->pattern = filterx_regexp_compile_pattern_defaults(pattern); if (!self->pattern) { filterx_expr_unref(&self->super); @@ -384,502 +126,3 @@ filterx_expr_regexp_nomatch_new(FilterXExpr *lhs, const gchar *pattern) self->invert = TRUE; return &self->super; } - -typedef struct FilterXExprRegexpSearchGenerator_ -{ - FilterXGeneratorFunction super; - FilterXExpr *lhs; - pcre2_code_8 *pattern; - FLAGSET flags; -} FilterXExprRegexpSearchGenerator; - -static gboolean -_regexp_search_generator_generate(FilterXExprGenerator *s, FilterXObject *fillable) -{ - FilterXExprRegexpSearchGenerator *self = (FilterXExprRegexpSearchGenerator *) s; - - gboolean result; - FilterXReMatchState state; - _state_init(&state); - state.flags = self->flags; - - gboolean matched = _match(self->lhs, self->pattern, &state); - if (!matched) - { - result = TRUE; - goto exit; - } - - if (!state.match_data) - { - /* Error happened during matching. */ - result = FALSE; - goto exit; - } - - result = _store_matches(self->pattern, &state, fillable); - -exit: - _state_cleanup(&state); - return result; -} - -static FilterXObject * -_regexp_search_generator_create_container(FilterXExprGenerator *s, FilterXExpr *fillable_parent) -{ - FilterXExprRegexpSearchGenerator *self = (FilterXExprRegexpSearchGenerator *) s; - - if (check_flag(self->flags, FILTERX_REGEXP_SEARCH_LIST_MODE)) - return filterx_generator_create_list_container(s, fillable_parent); - - return filterx_generator_create_dict_container(s, fillable_parent); -} - -static gboolean -_regexp_search_generator_init(FilterXExpr *s, GlobalConfig *cfg) -{ - FilterXExprRegexpSearchGenerator *self = (FilterXExprRegexpSearchGenerator *) s; - - if (!filterx_expr_init(self->lhs, cfg)) - return FALSE; - - return filterx_generator_init_method(s, cfg); -} - -static void -_regexp_search_generator_deinit(FilterXExpr *s, GlobalConfig *cfg) -{ - FilterXExprRegexpSearchGenerator *self = (FilterXExprRegexpSearchGenerator *) s; - - filterx_expr_deinit(self->lhs, cfg); - filterx_generator_deinit_method(s, cfg); -} - -static void -_regexp_search_generator_free(FilterXExpr *s) -{ - FilterXExprRegexpSearchGenerator *self = (FilterXExprRegexpSearchGenerator *) s; - - filterx_expr_unref(self->lhs); - if (self->pattern) - pcre2_code_free(self->pattern); - filterx_generator_function_free_method(&self->super); -} - -static gboolean -_extract_optional_arg_flag(FilterXExprRegexpSearchGenerator *self, FilterXRegexpSearchFlags flag, - FilterXFunctionArgs *args, GError **error) -{ - gboolean exists, eval_error; - g_assert(flag < FilterXRegexpSearchFlags_MAX); - const gchar *arg_name = FilterXRegexpSearchFlags_NAMES[flag]; - gboolean value = filterx_function_args_get_named_literal_boolean(args, arg_name, &exists, &eval_error); - if (!exists) - return TRUE; - - if (eval_error) - { - g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, - "%s argument must be boolean literal. " FILTERX_FUNC_REGEXP_SEARCH_USAGE, arg_name); - return FALSE; - } - - set_flag(&self->flags, flag, value); - - return TRUE; -} - -static gboolean -_extract_search_args(FilterXExprRegexpSearchGenerator *self, FilterXFunctionArgs *args, GError **error) -{ - if (filterx_function_args_len(args) != 2) - { - g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, - "invalid number of arguments. " FILTERX_FUNC_REGEXP_SEARCH_USAGE); - return FALSE; - } - - self->lhs = filterx_function_args_get_expr(args, 0); - - const gchar *pattern = filterx_function_args_get_literal_string(args, 1, NULL); - if (!pattern) - { - g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, - "pattern must be string literal. " FILTERX_FUNC_REGEXP_SEARCH_USAGE); - return FALSE; - } - - self->pattern = _compile_pattern_defaults(pattern); - if (!self->pattern) - { - g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, - "failed to compile pattern. " FILTERX_FUNC_REGEXP_SEARCH_USAGE); - return FALSE; - } - - return TRUE; - -} - -/* Takes reference of lhs */ -FilterXExpr * -filterx_generator_function_regexp_search_new(FilterXFunctionArgs *args, GError **error) -{ - FilterXExprRegexpSearchGenerator *self = g_new0(FilterXExprRegexpSearchGenerator, 1); - - filterx_generator_function_init_instance(&self->super, "regexp_search"); - self->super.super.generate = _regexp_search_generator_generate; - self->super.super.super.init = _regexp_search_generator_init; - self->super.super.super.deinit = _regexp_search_generator_deinit; - self->super.super.super.free_fn = _regexp_search_generator_free; - self->super.super.create_container = _regexp_search_generator_create_container; - - if (!_extract_optional_arg_flag(self, FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO, args, error)) - goto error; - - if (!_extract_optional_arg_flag(self, FILTERX_REGEXP_SEARCH_LIST_MODE, args, error)) - goto error; - - if (!_extract_search_args(self, args, error) || - !filterx_function_args_check(args, error)) - goto error; - - filterx_function_args_free(args); - return &self->super.super.super; - -error: - filterx_function_args_free(args); - filterx_expr_unref(&self->super.super.super); - return NULL; -} - - -typedef struct FilterXFuncRegexpSubst_ -{ - FilterXFunction super; - FilterXExpr *string_expr; - pcre2_code_8 *pattern; - gchar *replacement; - FilterXFuncRegexpSubstOpts opts; -} FilterXFuncRegexpSubst; - - -static inline gint -_start_offset(PCRE2_SIZE *ovector) -{ - return ovector[0]; -} - -static inline gint -_end_offset(PCRE2_SIZE *ovector) -{ - return ovector[1]; -} - -static inline gboolean -_is_zero_length_match(PCRE2_SIZE *ovector) -{ - return ovector[0] == ovector[1]; -} - -static gboolean -_build_replacement_stirng_with_match_groups(const FilterXFuncRegexpSubst *self, FilterXReMatchState *state, - GString *replacement_string) -{ - PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(state->match_data); - g_string_set_size(replacement_string, 0); - const gchar *rep_ptr = self->replacement; - const gchar *last_ptr = rep_ptr; - gint num_grps = state->rc; - - while (*rep_ptr) - { - if (*rep_ptr == '\\') - { - rep_ptr++; - if (*rep_ptr >= '1' && *rep_ptr <= '9') - { - gint grp_idx = *rep_ptr - '0'; - if (grp_idx < num_grps) - { - PCRE2_SIZE start = ovector[2 * grp_idx]; - PCRE2_SIZE end = ovector[2 * grp_idx + 1]; - if (start != PCRE2_UNSET) - { - g_string_append_len(replacement_string, last_ptr, rep_ptr - last_ptr - 1); - last_ptr = rep_ptr + 1; - size_t group_len = end - start; - g_string_append_len(replacement_string, state->lhs_str + start, group_len); - } - } - } - rep_ptr++; - } - else - rep_ptr++; - } - g_string_append_len(replacement_string, last_ptr, rep_ptr - last_ptr); - return TRUE; -} - -static FilterXObject * -_replace_matches(const FilterXFuncRegexpSubst *self, FilterXReMatchState *state) -{ - GString *new_value = scratch_buffers_alloc(); - PCRE2_SIZE *ovector = NULL; - gint pos = 0; - const gchar *replacement_string = self->replacement; - - if (self->opts.groups) - { - GString *rep_str = scratch_buffers_alloc(); - _build_replacement_stirng_with_match_groups(self, state, rep_str); - replacement_string = rep_str->str; - } - - do - { - ovector = pcre2_get_ovector_pointer(state->match_data); - - g_string_append_len(new_value, state->lhs_str + pos, _start_offset(ovector) - pos); - g_string_append(new_value, replacement_string); - - if (_is_zero_length_match(ovector)) - { - g_string_append_len(new_value, state->lhs_str + pos, 1); - pos++; - } - else - pos = _end_offset(ovector); - - if (!_match_inner(state, self->pattern, pos)) - break; - } - while ((pos < state->lhs_str_len) && self->opts.global); - - // add the rest of the string - g_string_append_len(new_value, state->lhs_str + pos, state->lhs_str_len - pos); - - // handle the very last of zero lenght matches - if (_is_zero_length_match(ovector)) - g_string_append(new_value, replacement_string); - - return filterx_string_new(new_value->str, new_value->len); -} - -static FilterXObject * -_subst_eval(FilterXExpr *s) -{ - FilterXFuncRegexpSubst *self = (FilterXFuncRegexpSubst *) s; - - FilterXObject *result = NULL; - FilterXReMatchState state; - _state_init(&state); - - gboolean matched = _match(self->string_expr, self->pattern, &state); - if (!matched) - { - result = filterx_object_ref(state.lhs_obj); - goto exit; - } - - if (!state.match_data) - { - /* Error happened during matching. */ - result = NULL; - goto exit; - } - - result = _replace_matches(self, &state); - -exit: - _state_cleanup(&state); - return result; -} - -static FilterXExpr * -_extract_subst_string_expr_arg(FilterXFunctionArgs *args, GError **error) -{ - return filterx_function_args_get_expr(args, 0); -} - -static gint -_create_compile_opts(FilterXFuncRegexpSubstOpts opts) -{ - gint res = 0; - res ^= (-opts.utf8 ^ res) & PCRE2_NO_UTF_CHECK; - res ^= (-opts.ignorecase ^ res) & PCRE2_CASELESS; - res ^= (-opts.newline ^ res) & PCRE2_NEWLINE_ANYCRLF; - return res; -} - -static pcre2_code_8 * -_extract_subst_pattern_arg(FilterXFuncRegexpSubst *self, FilterXFunctionArgs *args, GError **error) -{ - const gchar *pattern = filterx_function_args_get_literal_string(args, 1, NULL); - if (!pattern) - { - g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, - "argument must be a string literal: pattern. " FILTERX_FUNC_REGEXP_SUBST_USAGE); - return NULL; - } - - return _compile_pattern(pattern, self->opts.jit, _create_compile_opts(self->opts)); -} - -static gchar * -_extract_subst_replacement_arg(FilterXFunctionArgs *args, GError **error) -{ - const gchar *replacement = filterx_function_args_get_literal_string(args, 2, NULL); - if (!replacement) - { - g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, - "argument must be a string literal: replacement. " FILTERX_FUNC_REGEXP_SUBST_USAGE); - return NULL; - } - - return g_strdup(replacement); -} - -static gboolean -_extract_literal_bool(FilterXFunctionArgs *args, const gchar *option_name, gboolean *value, GError **error) -{ - gboolean exists, eval_error; - gboolean val = filterx_function_args_get_named_literal_boolean(args, option_name, &exists, &eval_error); - if (exists) - { - if (eval_error) - { - g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, - "%s argument must be boolean literal. ", option_name); - return FALSE; - }; - *value = val; - } - return TRUE; -} - -static gboolean -_extract_optional_flags(FilterXFuncRegexpSubst *self, FilterXFunctionArgs *args, GError **error) -{ - if (!_extract_literal_bool(args, FILTERX_FUNC_REGEXP_SUBST_FLAG_GLOBAL_NAME, - &self->opts.global, error)) - return FALSE; - if (!_extract_literal_bool(args, FILTERX_FUNC_REGEXP_SUBST_FLAG_JIT_NAME, &self->opts.jit, - error)) - return FALSE; - if (!_extract_literal_bool(args, FILTERX_FUNC_REGEXP_SUBST_FLAG_IGNORECASE_NAME, - &self->opts.ignorecase, error)) - return FALSE; - if (!_extract_literal_bool(args, FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE_NAME, - &self->opts.newline, error)) - return FALSE; - if (!_extract_literal_bool(args, FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8_NAME, &self->opts.utf8, - error)) - return FALSE; - if (!_extract_literal_bool(args, FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME, - &self->opts.groups, error)) - return FALSE; - return TRUE; -} - -static gboolean -_extract_subst_args(FilterXFuncRegexpSubst *self, FilterXFunctionArgs *args, GError **error) -{ - if (filterx_function_args_len(args) != 3) - { - g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, - "invalid number of arguments. " FILTERX_FUNC_REGEXP_SUBST_USAGE); - return FALSE; - } - - self->string_expr = _extract_subst_string_expr_arg(args, error); - if (!self->string_expr) - return FALSE; - - if (!_extract_optional_flags(self, args, error)) - return FALSE; - - self->pattern = _extract_subst_pattern_arg(self, args, error); - if (!self->pattern) - return FALSE; - - self->replacement = _extract_subst_replacement_arg(args, error); - if (!self->replacement) - return FALSE; - - - return TRUE; -} - -static gboolean -_subst_init(FilterXExpr *s, GlobalConfig *cfg) -{ - FilterXFuncRegexpSubst *self = (FilterXFuncRegexpSubst *) s; - - if (!filterx_expr_init(self->string_expr, cfg)) - return FALSE; - - return filterx_function_init_method(&self->super, cfg); -} - -static void -_subst_deinit(FilterXExpr *s, GlobalConfig *cfg) -{ - FilterXFuncRegexpSubst *self = (FilterXFuncRegexpSubst *) s; - filterx_expr_deinit(self->string_expr, cfg); - filterx_function_deinit_method(&self->super, cfg); -} - -static void -_subst_free(FilterXExpr *s) -{ - FilterXFuncRegexpSubst *self = (FilterXFuncRegexpSubst *) s; - filterx_expr_unref(self->string_expr); - if (self->pattern) - pcre2_code_free(self->pattern); - g_free(self->replacement); - filterx_function_free_method(&self->super); -} - -static void -_opts_init(FilterXFuncRegexpSubstOpts *opts) -{ - memset(opts, 0, sizeof(FilterXFuncRegexpSubstOpts)); - opts->jit = TRUE; -} - -FilterXExpr * -filterx_function_regexp_subst_new(FilterXFunctionArgs *args, GError **error) -{ - FilterXFuncRegexpSubst *self = g_new0(FilterXFuncRegexpSubst, 1); - filterx_function_init_instance(&self->super, "regexp_subst"); - self->super.super.eval = _subst_eval; - self->super.super.init = _subst_init; - self->super.super.deinit = _subst_deinit; - self->super.super.free_fn = _subst_free; - - _opts_init(&self->opts); - - if (!_extract_subst_args(self, args, error) || - !filterx_function_args_check(args, error)) - goto error; - - filterx_function_args_free(args); - return &self->super.super; - -error: - filterx_function_args_free(args); - filterx_expr_unref(&self->super.super); - return NULL; -} - -gboolean -filterx_regexp_subst_is_jit_enabled(FilterXExpr *s) -{ - g_assert(s); - FilterXFuncRegexpSubst *self = (FilterXFuncRegexpSubst *)s; - PCRE2_SIZE jit_size; - int info_result = pcre2_pattern_info(self->pattern, PCRE2_INFO_JITSIZE, &jit_size); - return info_result == 0 && jit_size > 0; -} diff --git a/lib/filterx/expr-regexp.h b/lib/filterx/expr-regexp.h index 3a9475411..d56105984 100644 --- a/lib/filterx/expr-regexp.h +++ b/lib/filterx/expr-regexp.h @@ -29,37 +29,7 @@ #include "filterx/expr-function.h" #include "filterx/func-flags.h" -#define FILTERX_FUNC_REGEXP_SUBST_FLAG_JIT_NAME "jit" -#define FILTERX_FUNC_REGEXP_SUBST_FLAG_GLOBAL_NAME "global" -#define FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8_NAME "utf8" -#define FILTERX_FUNC_REGEXP_SUBST_FLAG_IGNORECASE_NAME "ignorecase" -#define FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE_NAME "newline" -#define FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME "groups" - -DEFINE_FUNC_FLAGS(FilterXRegexpSearchFlags, - FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO, - FILTERX_REGEXP_SEARCH_LIST_MODE - ); - -#define FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO_NAME "keep_zero" -#define FILTERX_REGEXP_SEARCH_LIST_MODE_NAME "list_mode" - -extern const char *FilterXRegexpSearchFlags_NAMES[]; - -typedef struct FilterXFuncRegexpSubstOpts_ -{ - gboolean global; - gboolean jit; - gboolean utf8; - gboolean ignorecase; - gboolean newline; - gboolean groups; -} FilterXFuncRegexpSubstOpts; - FilterXExpr *filterx_expr_regexp_match_new(FilterXExpr *lhs, const gchar *pattern); FilterXExpr *filterx_expr_regexp_nomatch_new(FilterXExpr *lhs, const gchar *pattern); -FilterXExpr *filterx_generator_function_regexp_search_new(FilterXFunctionArgs *args, GError **error); -FilterXExpr *filterx_function_regexp_subst_new(FilterXFunctionArgs *args, GError **error); -gboolean filterx_regexp_subst_is_jit_enabled(FilterXExpr *s); #endif diff --git a/lib/filterx/filterx-globals.c b/lib/filterx/filterx-globals.c index 5309cdab2..67c977128 100644 --- a/lib/filterx/filterx-globals.c +++ b/lib/filterx/filterx-globals.c @@ -42,6 +42,8 @@ #include "filterx/func-str-transform.h" #include "filterx/func-flatten.h" #include "filterx/func-sdata.h" +#include "filterx/expr-regexp-search.h" +#include "filterx/expr-regexp-subst.h" #include "filterx/expr-regexp.h" #include "filterx/expr-unset.h" #include "filterx/filterx-eval.h" diff --git a/lib/filterx/tests/CMakeLists.txt b/lib/filterx/tests/CMakeLists.txt index 67a5febcf..71bd70ca2 100644 --- a/lib/filterx/tests/CMakeLists.txt +++ b/lib/filterx/tests/CMakeLists.txt @@ -24,3 +24,5 @@ add_unit_test(LIBTEST CRITERION TARGET test_expr_null_coalesce DEPENDS json-plug add_unit_test(LIBTEST CRITERION TARGET test_expr_plus DEPENDS json-plugin ${JSONC_LIBRARY}) add_unit_test(LIBTEST CRITERION TARGET test_expr_plus_generator DEPENDS json-plugin ${JSONC_LIBRARY}) add_unit_test(LIBTEST CRITERION TARGET test_metrics_labels DEPENDS json-plugin ${JSONC_LIBRARY}) +add_unit_test(LIBTEST CRITERION TARGET test_expr_regexp_search DEPENDS json-plugin ${JSONC_LIBRARY}) +add_unit_test(LIBTEST CRITERION TARGET test_expr_regexp_subst DEPENDS json-plugin ${JSONC_LIBRARY}) diff --git a/lib/filterx/tests/Makefile.am b/lib/filterx/tests/Makefile.am index 43a46ac49..67757e996 100644 --- a/lib/filterx/tests/Makefile.am +++ b/lib/filterx/tests/Makefile.am @@ -21,6 +21,8 @@ lib_filterx_tests_TESTS = \ lib/filterx/tests/test_func_istype \ lib/filterx/tests/test_func_unset_empties \ lib/filterx/tests/test_func_flatten \ + lib/filterx/tests/test_expr_regexp_search \ + lib/filterx/tests/test_expr_regexp_subst \ lib/filterx/tests/test_expr_regexp \ lib/filterx/tests/test_expr_null_coalesce \ lib/filterx/tests/test_expr_plus \ @@ -95,6 +97,12 @@ lib_filterx_tests_test_func_flatten_LDADD = $(TEST_LDADD) $(JSON_LIBS) lib_filterx_tests_test_expr_function_CFLAGS = $(TEST_CFLAGS) lib_filterx_tests_test_expr_function_LDADD = $(TEST_LDADD) $(JSON_LIBS) +lib_filterx_tests_test_expr_regexp_search_CFLAGS = $(TEST_CFLAGS) +lib_filterx_tests_test_expr_regexp_search_LDADD = $(TEST_LDADD) $(JSON_LIBS) + +lib_filterx_tests_test_expr_regexp_subst_CFLAGS = $(TEST_CFLAGS) +lib_filterx_tests_test_expr_regexp_subst_LDADD = $(TEST_LDADD) $(JSON_LIBS) + lib_filterx_tests_test_expr_regexp_CFLAGS = $(TEST_CFLAGS) lib_filterx_tests_test_expr_regexp_LDADD = $(TEST_LDADD) $(JSON_LIBS) diff --git a/lib/filterx/tests/test_expr_regexp.c b/lib/filterx/tests/test_expr_regexp.c index dfc943ac0..29fae9c51 100644 --- a/lib/filterx/tests/test_expr_regexp.c +++ b/lib/filterx/tests/test_expr_regexp.c @@ -78,577 +78,6 @@ Test(filterx_expr_regexp, regexp_match) _assert_match_init_error("abc", "("); } -static void -_parse_search_flags(GList *args, FLAGSET flags) -{ - FUNC_FLAGS_ITER(FilterXRegexpSearchFlags, - { - if (check_flag(flags, enum_elt)) - { - const gchar *flag_name = FilterXRegexpSearchFlags_NAMES[enum_elt]; - args = g_list_append(args, filterx_function_arg_new(flag_name, filterx_literal_new(filterx_boolean_new(TRUE)))); - } - }) -} - -static FilterXObject * -_search(const gchar *lhs, const gchar *pattern, FLAGSET flags) -{ - GList *args = NULL; - args = g_list_append(args, filterx_function_arg_new(NULL, filterx_non_literal_new(filterx_string_new(lhs, -1)))); - args = g_list_append(args, filterx_function_arg_new(NULL, filterx_literal_new(filterx_string_new(pattern, -1)))); - _parse_search_flags(args, flags); - - FilterXExpr *expr = filterx_generator_function_regexp_search_new(filterx_function_args_new(args, NULL), NULL); - FilterXExpr *parent_fillable_expr_new = filterx_literal_new(filterx_test_dict_new()); - FilterXExpr *cc_expr = filterx_generator_create_container_new(expr, parent_fillable_expr_new); - FilterXExpr *fillable_expr = filterx_literal_new(filterx_expr_eval(cc_expr)); - filterx_generator_set_fillable(expr, fillable_expr); - - FilterXObject *result_obj = filterx_expr_eval(expr); - cr_assert(result_obj); - cr_assert(filterx_object_truthy(result_obj)); - - FilterXObject *fillable = filterx_expr_eval(fillable_expr); - cr_assert(fillable); - - filterx_object_unref(result_obj); - filterx_expr_unref(cc_expr); - - return fillable; -} - -static void -_search_with_fillable(const gchar *lhs, const gchar *pattern, FilterXObject *fillable, FLAGSET flags) -{ - GList *args = NULL; - args = g_list_append(args, filterx_function_arg_new(NULL, filterx_non_literal_new(filterx_string_new(lhs, -1)))); - args = g_list_append(args, filterx_function_arg_new(NULL, filterx_literal_new(filterx_string_new(pattern, -1)))); - _parse_search_flags(args, flags); - - FilterXExpr *expr = filterx_generator_function_regexp_search_new(filterx_function_args_new(args, NULL), NULL); - filterx_generator_set_fillable(expr, filterx_literal_new(filterx_object_ref(fillable))); - - FilterXObject *result_obj = filterx_expr_eval(expr); - cr_assert(result_obj); - cr_assert(filterx_object_truthy(result_obj)); - - filterx_object_unref(result_obj); - filterx_expr_unref(expr); -} - -static void -_assert_search_init_error(const gchar *lhs, const gchar *pattern) -{ - GList *args = NULL; - args = g_list_append(args, filterx_function_arg_new(NULL, filterx_non_literal_new(filterx_string_new(lhs, -1)))); - args = g_list_append(args, filterx_function_arg_new(NULL, filterx_literal_new(filterx_string_new(pattern, -1)))); - - GError *arg_err = NULL; - GError *func_err = NULL; - cr_assert_not(filterx_generator_function_regexp_search_new(filterx_function_args_new(args, &arg_err), &func_err)); - - cr_assert(arg_err || func_err); - g_clear_error(&arg_err); - g_clear_error(&func_err); -} - -static void -_assert_len(FilterXObject *obj, guint64 expected_len) -{ - guint64 len; - cr_assert(filterx_object_len(obj, &len)); - cr_assert_eq(len, expected_len, "len mismatch. expected: %" G_GUINT64_FORMAT " actual: %" G_GUINT64_FORMAT, - expected_len, len); -} - -static void -_assert_list_elem(FilterXObject *list, gint64 index, const gchar *expected_value) -{ - FilterXObject *elem = filterx_list_get_subscript(list, index); - cr_assert(elem); - - const gchar *value = filterx_string_get_value_ref(elem, NULL); - cr_assert_str_eq(value, expected_value); - - filterx_object_unref(elem); -} - -static void -_assert_dict_elem(FilterXObject *list, const gchar *key, const gchar *expected_value) -{ - FilterXObject *key_obj = filterx_string_new(key, -1); - FilterXObject *elem = filterx_object_get_subscript(list, key_obj); - cr_assert(elem); - - const gchar *value = filterx_string_get_value_ref(elem, NULL); - cr_assert_str_eq(value, expected_value); - - filterx_object_unref(key_obj); - filterx_object_unref(elem); -} - -Test(filterx_expr_regexp, regexp_search_unnamed) -{ - FilterXObject *result = _search("foobarbaz", "(foo)(bar)(baz)", 0); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(dict))); - _assert_len(result, 3); - _assert_dict_elem(result, "1", "foo"); - _assert_dict_elem(result, "2", "bar"); - _assert_dict_elem(result, "3", "baz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_search_unnamed_grp_zero) -{ - FilterXObject *result = _search("foobarbaz", "(foo)(bar)(baz)", FLAG_VAL(FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO)); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(dict))); - _assert_len(result, 4); - _assert_dict_elem(result, "0", "foobarbaz"); - _assert_dict_elem(result, "1", "foo"); - _assert_dict_elem(result, "2", "bar"); - _assert_dict_elem(result, "3", "baz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_search_unnamed_grp_zero_list_mode) -{ - FilterXObject *result = _search("foobarbaz", "(foo)(bar)(baz)", - FLAG_VAL(FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO) | FLAG_VAL(FILTERX_REGEXP_SEARCH_LIST_MODE)); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(list))); - _assert_len(result, 4); - _assert_list_elem(result, 0, "foobarbaz"); - _assert_list_elem(result, 1, "foo"); - _assert_list_elem(result, 2, "bar"); - _assert_list_elem(result, 3, "baz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_search_named) -{ - FilterXObject *result = _search("foobarbaz", "(?foo)(?bar)(?baz)", 0); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(dict))); - _assert_len(result, 3); - _assert_dict_elem(result, "first", "foo"); - _assert_dict_elem(result, "second", "bar"); - _assert_dict_elem(result, "third", "baz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_search_named_grp_zero) -{ - FilterXObject *result = _search("foobarbaz", "(?foo)(?bar)(?baz)", - FLAG_VAL(FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO)); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(dict))); - _assert_len(result, 4); - _assert_dict_elem(result, "0", "foobarbaz"); - _assert_dict_elem(result, "first", "foo"); - _assert_dict_elem(result, "second", "bar"); - _assert_dict_elem(result, "third", "baz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_search_named_grp_zero_list_mode) -{ - FilterXObject *result = _search("foobarbaz", "(?foo)(?bar)(?baz)", - FLAG_VAL(FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO) | FLAG_VAL(FILTERX_REGEXP_SEARCH_LIST_MODE)); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(list))); - _assert_len(result, 4); - _assert_list_elem(result, 0, "foobarbaz"); - _assert_list_elem(result, 1, "foo"); - _assert_list_elem(result, 2, "bar"); - _assert_list_elem(result, 3, "baz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_search_mixed) -{ - FilterXObject *result = _search("foobarbaz", "(?foo)(bar)(?baz)", 0); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(dict))); - _assert_len(result, 3); - _assert_dict_elem(result, "first", "foo"); - _assert_dict_elem(result, "2", "bar"); - _assert_dict_elem(result, "third", "baz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_search_forced_list) -{ - FilterXObject *result = filterx_test_list_new(); - _search_with_fillable("foobarbaz", "(?foo)(bar)(?baz)", result, 0); - _assert_len(result, 3); - _assert_list_elem(result, 0, "foo"); - _assert_list_elem(result, 1, "bar"); - _assert_list_elem(result, 2, "baz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_search_forced_dict) -{ - FilterXObject *result = filterx_test_dict_new(); - _search_with_fillable("foobarbaz", "(foo)(bar)(baz)", result, 0); - _assert_len(result, 3); - _assert_dict_elem(result, "1", "foo"); - _assert_dict_elem(result, "2", "bar"); - _assert_dict_elem(result, "3", "baz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_search_forced_dict_list_mode) -{ - // list mode overrides the default dict container creation, but still returns dict when fillable type is forced - FilterXObject *result = filterx_test_dict_new(); - _search_with_fillable("foobarbaz", "(foo)(bar)(baz)", result, FLAG_VAL(FILTERX_REGEXP_SEARCH_LIST_MODE)); - _assert_len(result, 3); - _assert_dict_elem(result, "1", "foo"); - _assert_dict_elem(result, "2", "bar"); - _assert_dict_elem(result, "3", "baz"); - filterx_object_unref(result); -} - - -Test(filterx_expr_regexp, regexp_search_unnamed_no_match) -{ - FilterXObject *result = _search("foobarbaz", "(almafa)", 0); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(dict))); - _assert_len(result, 0); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_search_named_no_match) -{ - FilterXObject *result = _search("foobarbaz", "(?almafa)", 0); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(dict))); - _assert_len(result, 0); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_search_retain_group_zero_if_sole) -{ - FilterXObject *result = _search("foobarbaz", "foobarbaz", 0); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(dict))); - _assert_len(result, 1); - _assert_dict_elem(result, "0", "foobarbaz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_search_retain_group_zero_if_sole_list_mode) -{ - FilterXObject *result = _search("foobarbaz", "foobarbaz", FLAG_VAL(FILTERX_REGEXP_SEARCH_LIST_MODE)); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(list))); - _assert_len(result, 1); - _assert_list_elem(result, 0, "foobarbaz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_search_init_error) -{ - _assert_search_init_error("foobarbaz", "("); -} - -static FilterXExpr * -_build_subst_func(const gchar *pattern, const gchar *repr, const gchar *str, FilterXFuncRegexpSubstOpts opts) -{ - GList *args = NULL; - args = g_list_append(args, filterx_function_arg_new(NULL, filterx_non_literal_new(filterx_string_new(str, -1)))); - args = g_list_append(args, filterx_function_arg_new(NULL, filterx_literal_new(filterx_string_new(pattern, -1)))); - args = g_list_append(args, filterx_function_arg_new(NULL, filterx_literal_new(filterx_string_new(repr, -1)))); - if (opts.global) - args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_REGEXP_SUBST_FLAG_GLOBAL_NAME, - filterx_literal_new(filterx_boolean_new(TRUE)))); - if (!opts.jit) - args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_REGEXP_SUBST_FLAG_JIT_NAME, - filterx_literal_new(filterx_boolean_new(FALSE)))); - if (opts.ignorecase) - args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_REGEXP_SUBST_FLAG_IGNORECASE_NAME, - filterx_literal_new(filterx_boolean_new(TRUE)))); - if (opts.newline) - args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE_NAME, - filterx_literal_new(filterx_boolean_new(TRUE)))); - if (opts.utf8) - args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8_NAME, - filterx_literal_new(filterx_boolean_new(TRUE)))); - if (opts.groups) - args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME, - filterx_literal_new(filterx_boolean_new(TRUE)))); - - GError *err = NULL; - FilterXExpr *func = filterx_function_regexp_subst_new(filterx_function_args_new(args, NULL), &err); - cr_assert_null(err); - return func; -} - -static FilterXObject * -_sub(const gchar *pattern, const gchar *repr, const gchar *str, FilterXFuncRegexpSubstOpts opts) -{ - FilterXExpr *func = _build_subst_func(pattern, repr, str, opts); - - FilterXObject *res = filterx_expr_eval(func); - filterx_expr_unref(func); - return res; -} - -// disabling jit compiler since it confuses valgrind in some cases -// in some test cases we test jit against non-jit, those tests will produce invalid reads in valgrind -// further info: https://stackoverflow.com/questions/74777619/valgrind-conditional-jump-error-with-pcre2-jit-when-reading-from-file - -Test(filterx_expr_regexp, regexp_subst_single_replace) -{ - FilterXFuncRegexpSubstOpts opts = {}; - FilterXObject *result = _sub("oo", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "fXbarbaz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_single_replace_with_global) -{ - FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; - FilterXObject *result = _sub("oo", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "fXbarbaz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_multi_replace) -{ - FilterXFuncRegexpSubstOpts opts = {}; - FilterXObject *result = _sub("a", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "foobXrbaz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_multi_replace_with_global) -{ - FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; - FilterXObject *result = _sub("a", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "foobXrbXz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_zero_length_matches) -{ - FilterXFuncRegexpSubstOpts opts = {}; - FilterXObject *result = _sub("u*", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "XfoobarbazX"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_zero_length_matches_with_global) -{ - FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; - FilterXObject *result = _sub("u*", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "XfXoXoXbXaXrXbXaXzX"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_zero_length_matches_with_char_matches) -{ - FilterXFuncRegexpSubstOpts opts = {}; - FilterXObject *result = _sub("a*", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "XfoobarbazX"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_zero_length_matches_with_char_matches_with_global) -{ - FilterXFuncRegexpSubstOpts opts = {.global = TRUE, .jit=FALSE}; - FilterXObject *result = _sub("a*", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "XfXoXoXbXXrXbXXzX"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_at_beginning) -{ - FilterXFuncRegexpSubstOpts opts = {}; - FilterXObject *result = _sub("fo", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "Xobarbaz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_at_beginning_with_global) -{ - FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; - FilterXObject *result = _sub("fo", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "Xobarbaz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_at_the_end) -{ - FilterXFuncRegexpSubstOpts opts = {}; - FilterXObject *result = _sub("az", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "foobarbX"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_at_the_end_with_global) -{ - FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; - FilterXObject *result = _sub("az", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "foobarbX"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_multi_replace_multi_pattern) -{ - FilterXFuncRegexpSubstOpts opts = {}; - FilterXObject *result = _sub("(a|o)", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "fXobarbaz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_multi_replace_multi_pattern_with_global) -{ - FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; - FilterXObject *result = _sub("(a|o)", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "fXXbXrbXz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_accept_end_literal) -{ - FilterXFuncRegexpSubstOpts opts = {}; - FilterXObject *result = _sub("ba.$", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "foobarX"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_accept_end_literal_with_global) -{ - FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; - FilterXObject *result = _sub("ba.$", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "foobarX"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_accept_groups) -{ - FilterXFuncRegexpSubstOpts opts = {}; - FilterXObject *result = _sub("(o)*(ba)", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "fXrbaz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_accept_groups_with_global) -{ - FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; - FilterXObject *result = _sub("(o)*(ba)", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "fXrXz"); - filterx_object_unref(result); -} - -Test(filterx_expr_regexp, regexp_subst_nojit_arg) -{ - FilterXFuncRegexpSubstOpts opts = {.jit = TRUE}; - FilterXExpr *func = _build_subst_func("o", "X", "foobarbaz", opts); - cr_assert_not_null(func); - cr_assert(filterx_regexp_subst_is_jit_enabled(func)); - filterx_expr_unref(func); - - FilterXFuncRegexpSubstOpts opts_nojit = {}; - FilterXExpr *func_nojit = _build_subst_func("o", "X", "foobarbaz", opts_nojit); - cr_assert_not_null(func_nojit); - cr_assert(!filterx_regexp_subst_is_jit_enabled(func_nojit)); - filterx_expr_unref(func_nojit); -} - -Test(filterx_expr_regexp, regexp_subst_match_opt_ignorecase) -{ - FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; - FilterXObject *result = _sub("(O|A)", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "foobarbaz"); - filterx_object_unref(result); - - FilterXFuncRegexpSubstOpts opts_alt = {.ignorecase = TRUE, .global = TRUE}; - FilterXObject *result_alt = _sub("(O|A)", "X", "foobarbaz", opts_alt); - cr_assert(filterx_object_is_type(result_alt, &FILTERX_TYPE_NAME(string))); - const gchar *res_alt = filterx_string_get_value_ref(result_alt, NULL); - cr_assert_str_eq(res_alt, "fXXbXrbXz"); - filterx_object_unref(result_alt); -} - -Test(filterx_expr_regexp, regexp_subst_match_opt_ignorecase_nojit) -{ - // check whether the CASELESS option applied with non-jit pattern - FilterXFuncRegexpSubstOpts opts = {.global=TRUE}; - FilterXObject *result = _sub("(O|A)", "X", "foobarbaz", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "foobarbaz"); - filterx_object_unref(result); - - FilterXFuncRegexpSubstOpts opts_alt = {.ignorecase = TRUE, .global = TRUE, .jit = TRUE}; - FilterXObject *result_alt = _sub("(O|A)", "X", "foobarbaz", opts_alt); - cr_assert(filterx_object_is_type(result_alt, &FILTERX_TYPE_NAME(string))); - const gchar *res_alt = filterx_string_get_value_ref(result_alt, NULL); - cr_assert_str_eq(res_alt, "fXXbXrbXz"); - filterx_object_unref(result_alt); -} - -Test(filterx_expr_regexp, regexp_subst_group_subst) -{ - FilterXFuncRegexpSubstOpts opts = {}; - FilterXObject *result = _sub("(\\d{2})-(\\d{2})-(\\d{4})", "\\3-\\2-\\1", "25-02-2022", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "\\3-\\2-\\1"); - filterx_object_unref(result); - - FilterXFuncRegexpSubstOpts opts_alt = {.groups = TRUE}; - FilterXObject *result_alt = _sub("(\\d{2})-(\\d{2})-(\\d{4})", "\\3-\\2-\\1", "25-02-2022", opts_alt); - cr_assert(filterx_object_is_type(result_alt, &FILTERX_TYPE_NAME(string))); - const gchar *res_alt = filterx_string_get_value_ref(result_alt, NULL); - cr_assert_str_eq(res_alt, "2022-02-25"); - filterx_object_unref(result_alt); -} - -Test(filterx_expr_regexp, regexp_subst_group_subst_without_ref) -{ - FilterXFuncRegexpSubstOpts opts = {.groups = TRUE}; - FilterXObject *result = _sub("(\\d{2})-(\\d{2})-(\\d{4})", "group without ref", "25-02-2022", opts); - cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); - const gchar *res = filterx_string_get_value_ref(result, NULL); - cr_assert_str_eq(res, "group without ref"); - filterx_object_unref(result); -} - static void setup(void) { diff --git a/lib/filterx/tests/test_expr_regexp_search.c b/lib/filterx/tests/test_expr_regexp_search.c new file mode 100644 index 000000000..cb45696f4 --- /dev/null +++ b/lib/filterx/tests/test_expr_regexp_search.c @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2023 Axoflow + * Copyright (c) 2024 shifter + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#include +#include "libtest/filterx-lib.h" + +#include "filterx/expr-regexp-search.h" +#include "filterx/expr-literal.h" +#include "filterx/object-string.h" +#include "filterx/object-primitive.h" +#include "filterx/object-dict-interface.h" +#include "filterx/object-list-interface.h" +#include "filterx/filterx-object-istype.h" +#include "apphook.h" +#include "scratch-buffers.h" +#include "compat/pcre.h" + +static void +_parse_search_flags(GList *args, FLAGSET flags) +{ + FUNC_FLAGS_ITER(FilterXRegexpSearchFlags, + { + if (check_flag(flags, enum_elt)) + { + const gchar *flag_name = FilterXRegexpSearchFlags_NAMES[enum_elt]; + args = g_list_append(args, filterx_function_arg_new(flag_name, filterx_literal_new(filterx_boolean_new(TRUE)))); + } + }) +} + +static FilterXObject * +_search(const gchar *lhs, const gchar *pattern, FLAGSET flags) +{ + GList *args = NULL; + args = g_list_append(args, filterx_function_arg_new(NULL, filterx_non_literal_new(filterx_string_new(lhs, -1)))); + args = g_list_append(args, filterx_function_arg_new(NULL, filterx_literal_new(filterx_string_new(pattern, -1)))); + _parse_search_flags(args, flags); + + FilterXExpr *expr = filterx_generator_function_regexp_search_new(filterx_function_args_new(args, NULL), NULL); + FilterXExpr *parent_fillable_expr_new = filterx_literal_new(filterx_test_dict_new()); + FilterXExpr *cc_expr = filterx_generator_create_container_new(expr, parent_fillable_expr_new); + FilterXExpr *fillable_expr = filterx_literal_new(filterx_expr_eval(cc_expr)); + filterx_generator_set_fillable(expr, fillable_expr); + + FilterXObject *result_obj = filterx_expr_eval(expr); + cr_assert(result_obj); + cr_assert(filterx_object_truthy(result_obj)); + + FilterXObject *fillable = filterx_expr_eval(fillable_expr); + cr_assert(fillable); + + filterx_object_unref(result_obj); + filterx_expr_unref(cc_expr); + + return fillable; +} + +static void +_search_with_fillable(const gchar *lhs, const gchar *pattern, FilterXObject *fillable, FLAGSET flags) +{ + GList *args = NULL; + args = g_list_append(args, filterx_function_arg_new(NULL, filterx_non_literal_new(filterx_string_new(lhs, -1)))); + args = g_list_append(args, filterx_function_arg_new(NULL, filterx_literal_new(filterx_string_new(pattern, -1)))); + _parse_search_flags(args, flags); + + FilterXExpr *expr = filterx_generator_function_regexp_search_new(filterx_function_args_new(args, NULL), NULL); + filterx_generator_set_fillable(expr, filterx_literal_new(filterx_object_ref(fillable))); + + FilterXObject *result_obj = filterx_expr_eval(expr); + cr_assert(result_obj); + cr_assert(filterx_object_truthy(result_obj)); + + filterx_object_unref(result_obj); + filterx_expr_unref(expr); +} + +static void +_assert_search_init_error(const gchar *lhs, const gchar *pattern) +{ + GList *args = NULL; + args = g_list_append(args, filterx_function_arg_new(NULL, filterx_non_literal_new(filterx_string_new(lhs, -1)))); + args = g_list_append(args, filterx_function_arg_new(NULL, filterx_literal_new(filterx_string_new(pattern, -1)))); + + GError *arg_err = NULL; + GError *func_err = NULL; + cr_assert_not(filterx_generator_function_regexp_search_new(filterx_function_args_new(args, &arg_err), &func_err)); + + cr_assert(arg_err || func_err); + g_clear_error(&arg_err); + g_clear_error(&func_err); +} + +static void +_assert_len(FilterXObject *obj, guint64 expected_len) +{ + guint64 len; + cr_assert(filterx_object_len(obj, &len)); + cr_assert_eq(len, expected_len, "len mismatch. expected: %" G_GUINT64_FORMAT " actual: %" G_GUINT64_FORMAT, + expected_len, len); +} + +static void +_assert_list_elem(FilterXObject *list, gint64 index, const gchar *expected_value) +{ + FilterXObject *elem = filterx_list_get_subscript(list, index); + cr_assert(elem); + + const gchar *value = filterx_string_get_value_ref(elem, NULL); + cr_assert_str_eq(value, expected_value); + + filterx_object_unref(elem); +} + +static void +_assert_dict_elem(FilterXObject *list, const gchar *key, const gchar *expected_value) +{ + FilterXObject *key_obj = filterx_string_new(key, -1); + FilterXObject *elem = filterx_object_get_subscript(list, key_obj); + cr_assert(elem); + + const gchar *value = filterx_string_get_value_ref(elem, NULL); + cr_assert_str_eq(value, expected_value); + + filterx_object_unref(key_obj); + filterx_object_unref(elem); +} + +Test(filterx_expr_regexp_search, unnamed) +{ + FilterXObject *result = _search("foobarbaz", "(foo)(bar)(baz)", 0); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(dict))); + _assert_len(result, 3); + _assert_dict_elem(result, "1", "foo"); + _assert_dict_elem(result, "2", "bar"); + _assert_dict_elem(result, "3", "baz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_search, unnamed_grp_zero) +{ + FilterXObject *result = _search("foobarbaz", "(foo)(bar)(baz)", FLAG_VAL(FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO)); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(dict))); + _assert_len(result, 4); + _assert_dict_elem(result, "0", "foobarbaz"); + _assert_dict_elem(result, "1", "foo"); + _assert_dict_elem(result, "2", "bar"); + _assert_dict_elem(result, "3", "baz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_search, unnamed_grp_zero_list_mode) +{ + FilterXObject *result = _search("foobarbaz", "(foo)(bar)(baz)", + FLAG_VAL(FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO) | FLAG_VAL(FILTERX_REGEXP_SEARCH_LIST_MODE)); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(list))); + _assert_len(result, 4); + _assert_list_elem(result, 0, "foobarbaz"); + _assert_list_elem(result, 1, "foo"); + _assert_list_elem(result, 2, "bar"); + _assert_list_elem(result, 3, "baz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_search, named) +{ + FilterXObject *result = _search("foobarbaz", "(?foo)(?bar)(?baz)", 0); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(dict))); + _assert_len(result, 3); + _assert_dict_elem(result, "first", "foo"); + _assert_dict_elem(result, "second", "bar"); + _assert_dict_elem(result, "third", "baz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_search, named_grp_zero) +{ + FilterXObject *result = _search("foobarbaz", "(?foo)(?bar)(?baz)", + FLAG_VAL(FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO)); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(dict))); + _assert_len(result, 4); + _assert_dict_elem(result, "0", "foobarbaz"); + _assert_dict_elem(result, "first", "foo"); + _assert_dict_elem(result, "second", "bar"); + _assert_dict_elem(result, "third", "baz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_search, named_grp_zero_list_mode) +{ + FilterXObject *result = _search("foobarbaz", "(?foo)(?bar)(?baz)", + FLAG_VAL(FILTERX_REGEXP_SEARCH_KEEP_GRP_ZERO) | FLAG_VAL(FILTERX_REGEXP_SEARCH_LIST_MODE)); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(list))); + _assert_len(result, 4); + _assert_list_elem(result, 0, "foobarbaz"); + _assert_list_elem(result, 1, "foo"); + _assert_list_elem(result, 2, "bar"); + _assert_list_elem(result, 3, "baz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_search, mixed) +{ + FilterXObject *result = _search("foobarbaz", "(?foo)(bar)(?baz)", 0); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(dict))); + _assert_len(result, 3); + _assert_dict_elem(result, "first", "foo"); + _assert_dict_elem(result, "2", "bar"); + _assert_dict_elem(result, "third", "baz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_search, forced_list) +{ + FilterXObject *result = filterx_test_list_new(); + _search_with_fillable("foobarbaz", "(?foo)(bar)(?baz)", result, 0); + _assert_len(result, 3); + _assert_list_elem(result, 0, "foo"); + _assert_list_elem(result, 1, "bar"); + _assert_list_elem(result, 2, "baz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_search, forced_dict) +{ + FilterXObject *result = filterx_test_dict_new(); + _search_with_fillable("foobarbaz", "(foo)(bar)(baz)", result, 0); + _assert_len(result, 3); + _assert_dict_elem(result, "1", "foo"); + _assert_dict_elem(result, "2", "bar"); + _assert_dict_elem(result, "3", "baz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_search, forced_dict_list_mode) +{ + // list mode overrides the default dict container creation, but still returns dict when fillable type is forced + FilterXObject *result = filterx_test_dict_new(); + _search_with_fillable("foobarbaz", "(foo)(bar)(baz)", result, FLAG_VAL(FILTERX_REGEXP_SEARCH_LIST_MODE)); + _assert_len(result, 3); + _assert_dict_elem(result, "1", "foo"); + _assert_dict_elem(result, "2", "bar"); + _assert_dict_elem(result, "3", "baz"); + filterx_object_unref(result); +} + + +Test(filterx_expr_regexp_search, unnamed_no_match) +{ + FilterXObject *result = _search("foobarbaz", "(almafa)", 0); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(dict))); + _assert_len(result, 0); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_search, named_no_match) +{ + FilterXObject *result = _search("foobarbaz", "(?almafa)", 0); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(dict))); + _assert_len(result, 0); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_search, retain_group_zero_if_sole) +{ + FilterXObject *result = _search("foobarbaz", "foobarbaz", 0); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(dict))); + _assert_len(result, 1); + _assert_dict_elem(result, "0", "foobarbaz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_search, retain_group_zero_if_sole_list_mode) +{ + FilterXObject *result = _search("foobarbaz", "foobarbaz", FLAG_VAL(FILTERX_REGEXP_SEARCH_LIST_MODE)); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(list))); + _assert_len(result, 1); + _assert_list_elem(result, 0, "foobarbaz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_search, init_error) +{ + _assert_search_init_error("foobarbaz", "("); +} + +static void +setup(void) +{ + app_startup(); + init_libtest_filterx(); +} + +static void +teardown(void) +{ + scratch_buffers_explicit_gc(); + deinit_libtest_filterx(); + app_shutdown(); +} + +TestSuite(filterx_expr_regexp_search, .init = setup, .fini = teardown); diff --git a/lib/filterx/tests/test_expr_regexp_subst.c b/lib/filterx/tests/test_expr_regexp_subst.c new file mode 100644 index 000000000..3270cfadd --- /dev/null +++ b/lib/filterx/tests/test_expr_regexp_subst.c @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2023 Axoflow + * Copyright (c) 2024 shifter + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#include +#include "libtest/filterx-lib.h" + +#include "filterx/expr-regexp.h" +#include "filterx/expr-regexp-subst.h" +#include "filterx/expr-literal.h" +#include "filterx/object-string.h" +#include "filterx/object-primitive.h" +#include "filterx/object-dict-interface.h" +#include "filterx/object-list-interface.h" +#include "filterx/filterx-object-istype.h" +#include "apphook.h" +#include "scratch-buffers.h" +#include "compat/pcre.h" + +typedef struct FilterXFuncRegexpSubstOpts_ +{ + gboolean global; + gboolean jit; + gboolean utf8; + gboolean ignorecase; + gboolean newline; + gboolean groups; +} FilterXFuncRegexpSubstOpts; + +static FilterXExpr * +_build_subst_func(const gchar *pattern, const gchar *repr, const gchar *str, FilterXFuncRegexpSubstOpts opts) +{ + GList *args = NULL; + args = g_list_append(args, filterx_function_arg_new(NULL, filterx_non_literal_new(filterx_string_new(str, -1)))); + args = g_list_append(args, filterx_function_arg_new(NULL, filterx_literal_new(filterx_string_new(pattern, -1)))); + args = g_list_append(args, filterx_function_arg_new(NULL, filterx_literal_new(filterx_string_new(repr, -1)))); + if (opts.global) + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_REGEXP_SUBST_FLAG_GLOBAL_NAME, + filterx_literal_new(filterx_boolean_new(TRUE)))); + if (!opts.jit) + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_REGEXP_SUBST_FLAG_JIT_NAME, + filterx_literal_new(filterx_boolean_new(FALSE)))); + if (opts.ignorecase) + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_REGEXP_SUBST_FLAG_IGNORECASE_NAME, + filterx_literal_new(filterx_boolean_new(TRUE)))); + if (opts.newline) + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE_NAME, + filterx_literal_new(filterx_boolean_new(TRUE)))); + if (opts.utf8) + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8_NAME, + filterx_literal_new(filterx_boolean_new(TRUE)))); + if (opts.groups) + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME, + filterx_literal_new(filterx_boolean_new(TRUE)))); + + GError *err = NULL; + FilterXExpr *func = filterx_function_regexp_subst_new(filterx_function_args_new(args, NULL), &err); + cr_assert_null(err); + return func; +} + +static FilterXObject * +_sub(const gchar *pattern, const gchar *repr, const gchar *str, FilterXFuncRegexpSubstOpts opts) +{ + FilterXExpr *func = _build_subst_func(pattern, repr, str, opts); + + FilterXObject *res = filterx_expr_eval(func); + filterx_expr_unref(func); + return res; +} + +// disabling jit compiler since it confuses valgrind in some cases +// in some test cases we test jit against non-jit, those tests will produce invalid reads in valgrind +// further info: https://stackoverflow.com/questions/74777619/valgrind-conditional-jump-error-with-pcre2-jit-when-reading-from-file + +Test(filterx_expr_regexp_subst, regexp_subst_single_replace) +{ + FilterXFuncRegexpSubstOpts opts = {}; + FilterXObject *result = _sub("oo", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "fXbarbaz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_single_replace_with_global) +{ + FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; + FilterXObject *result = _sub("oo", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "fXbarbaz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_multi_replace) +{ + FilterXFuncRegexpSubstOpts opts = {}; + FilterXObject *result = _sub("a", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "foobXrbaz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_multi_replace_with_global) +{ + FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; + FilterXObject *result = _sub("a", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "foobXrbXz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_zero_length_matches) +{ + FilterXFuncRegexpSubstOpts opts = {}; + FilterXObject *result = _sub("u*", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "XfoobarbazX"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_zero_length_matches_with_global) +{ + FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; + FilterXObject *result = _sub("u*", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "XfXoXoXbXaXrXbXaXzX"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_zero_length_matches_with_char_matches) +{ + FilterXFuncRegexpSubstOpts opts = {}; + FilterXObject *result = _sub("a*", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "XfoobarbazX"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_zero_length_matches_with_char_matches_with_global) +{ + FilterXFuncRegexpSubstOpts opts = {.global = TRUE, .jit=FALSE}; + FilterXObject *result = _sub("a*", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "XfXoXoXbXXrXbXXzX"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_at_beginning) +{ + FilterXFuncRegexpSubstOpts opts = {}; + FilterXObject *result = _sub("fo", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "Xobarbaz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_at_beginning_with_global) +{ + FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; + FilterXObject *result = _sub("fo", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "Xobarbaz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_at_the_end) +{ + FilterXFuncRegexpSubstOpts opts = {}; + FilterXObject *result = _sub("az", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "foobarbX"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_at_the_end_with_global) +{ + FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; + FilterXObject *result = _sub("az", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "foobarbX"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_multi_replace_multi_pattern) +{ + FilterXFuncRegexpSubstOpts opts = {}; + FilterXObject *result = _sub("(a|o)", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "fXobarbaz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_multi_replace_multi_pattern_with_global) +{ + FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; + FilterXObject *result = _sub("(a|o)", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "fXXbXrbXz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_accept_end_literal) +{ + FilterXFuncRegexpSubstOpts opts = {}; + FilterXObject *result = _sub("ba.$", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "foobarX"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_accept_end_literal_with_global) +{ + FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; + FilterXObject *result = _sub("ba.$", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "foobarX"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_accept_groups) +{ + FilterXFuncRegexpSubstOpts opts = {}; + FilterXObject *result = _sub("(o)*(ba)", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "fXrbaz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_accept_groups_with_global) +{ + FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; + FilterXObject *result = _sub("(o)*(ba)", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "fXrXz"); + filterx_object_unref(result); +} + +Test(filterx_expr_regexp_subst, regexp_subst_nojit_arg) +{ + FilterXFuncRegexpSubstOpts opts = {.jit = TRUE}; + FilterXExpr *func = _build_subst_func("o", "X", "foobarbaz", opts); + cr_assert_not_null(func); + cr_assert(filterx_regexp_subst_is_jit_enabled(func)); + filterx_expr_unref(func); + + FilterXFuncRegexpSubstOpts opts_nojit = {}; + FilterXExpr *func_nojit = _build_subst_func("o", "X", "foobarbaz", opts_nojit); + cr_assert_not_null(func_nojit); + cr_assert(!filterx_regexp_subst_is_jit_enabled(func_nojit)); + filterx_expr_unref(func_nojit); +} + +Test(filterx_expr_regexp_subst, regexp_subst_match_opt_ignorecase) +{ + FilterXFuncRegexpSubstOpts opts = {.global = TRUE}; + FilterXObject *result = _sub("(O|A)", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "foobarbaz"); + filterx_object_unref(result); + + FilterXFuncRegexpSubstOpts opts_alt = {.ignorecase = TRUE, .global = TRUE}; + FilterXObject *result_alt = _sub("(O|A)", "X", "foobarbaz", opts_alt); + cr_assert(filterx_object_is_type(result_alt, &FILTERX_TYPE_NAME(string))); + const gchar *res_alt = filterx_string_get_value_ref(result_alt, NULL); + cr_assert_str_eq(res_alt, "fXXbXrbXz"); + filterx_object_unref(result_alt); +} + +Test(filterx_expr_regexp_subst, regexp_subst_match_opt_ignorecase_nojit) +{ + // check whether the CASELESS option applied with non-jit pattern + FilterXFuncRegexpSubstOpts opts = {.global=TRUE}; + FilterXObject *result = _sub("(O|A)", "X", "foobarbaz", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "foobarbaz"); + filterx_object_unref(result); + + FilterXFuncRegexpSubstOpts opts_alt = {.ignorecase = TRUE, .global = TRUE, .jit = TRUE}; + FilterXObject *result_alt = _sub("(O|A)", "X", "foobarbaz", opts_alt); + cr_assert(filterx_object_is_type(result_alt, &FILTERX_TYPE_NAME(string))); + const gchar *res_alt = filterx_string_get_value_ref(result_alt, NULL); + cr_assert_str_eq(res_alt, "fXXbXrbXz"); + filterx_object_unref(result_alt); +} + +Test(filterx_expr_regexp_subst, regexp_subst_group_subst) +{ + FilterXFuncRegexpSubstOpts opts = {}; + FilterXObject *result = _sub("(\\d{2})-(\\d{2})-(\\d{4})", "\\3-\\2-\\1", "25-02-2022", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "\\3-\\2-\\1"); + filterx_object_unref(result); + + FilterXFuncRegexpSubstOpts opts_alt = {.groups = TRUE}; + FilterXObject *result_alt = _sub("(\\d{2})-(\\d{2})-(\\d{4})", "\\3-\\2-\\1", "25-02-2022", opts_alt); + cr_assert(filterx_object_is_type(result_alt, &FILTERX_TYPE_NAME(string))); + const gchar *res_alt = filterx_string_get_value_ref(result_alt, NULL); + cr_assert_str_eq(res_alt, "2022-02-25"); + filterx_object_unref(result_alt); +} + +Test(filterx_expr_regexp_subst, regexp_subst_group_subst_without_ref) +{ + FilterXFuncRegexpSubstOpts opts = {.groups = TRUE}; + FilterXObject *result = _sub("(\\d{2})-(\\d{2})-(\\d{4})", "group without ref", "25-02-2022", opts); + cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string))); + const gchar *res = filterx_string_get_value_ref(result, NULL); + cr_assert_str_eq(res, "group without ref"); + filterx_object_unref(result); +} + +static void +setup(void) +{ + app_startup(); + init_libtest_filterx(); +} + +static void +teardown(void) +{ + scratch_buffers_explicit_gc(); + deinit_libtest_filterx(); + app_shutdown(); +} + +TestSuite(filterx_expr_regexp_subst, .init = setup, .fini = teardown);