From 424d76520d2b45da5177bc53ae1ab697157844d5 Mon Sep 17 00:00:00 2001 From: shifter Date: Fri, 31 May 2024 09:18:25 +0200 Subject: [PATCH] csvparser: add filterx-func-format-csv function Signed-off-by: shifter --- modules/csvparser/CMakeLists.txt | 2 + modules/csvparser/Makefile.am | 4 +- modules/csvparser/csvparser-plugin.c | 6 + modules/csvparser/filterx-func-format-csv.c | 255 ++++++++++++++++++++ modules/csvparser/filterx-func-format-csv.h | 36 +++ 5 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 modules/csvparser/filterx-func-format-csv.c create mode 100644 modules/csvparser/filterx-func-format-csv.h diff --git a/modules/csvparser/CMakeLists.txt b/modules/csvparser/CMakeLists.txt index e9b000d24b..a02a49bbfc 100644 --- a/modules/csvparser/CMakeLists.txt +++ b/modules/csvparser/CMakeLists.txt @@ -6,6 +6,8 @@ set(CSVPARSER_SOURCES csvparser-plugin.c filterx-func-parse-csv.h filterx-func-parse-csv.c + filterx-func-format-csv.h + filterx-func-format-csv.c ) add_module( diff --git a/modules/csvparser/Makefile.am b/modules/csvparser/Makefile.am index 62d8e9c10d..0ace15a301 100644 --- a/modules/csvparser/Makefile.am +++ b/modules/csvparser/Makefile.am @@ -7,7 +7,9 @@ modules_csvparser_libcsvparser_la_SOURCES = \ modules/csvparser/csvparser-parser.h \ modules/csvparser/csvparser-plugin.c \ modules/csvparser/filterx-func-parse-csv.h \ - modules/csvparser/filterx-func-parse-csv.c + modules/csvparser/filterx-func-parse-csv.c \ + modules/csvparser/filterx-func-format-csv.h \ + modules/csvparser/filterx-func-format-csv.c modules_csvparser_libcsvparser_la_CPPFLAGS = \ $(AM_CPPFLAGS) \ diff --git a/modules/csvparser/csvparser-plugin.c b/modules/csvparser/csvparser-plugin.c index 5cbd7aee01..d3f777ed29 100644 --- a/modules/csvparser/csvparser-plugin.c +++ b/modules/csvparser/csvparser-plugin.c @@ -26,6 +26,7 @@ #include "plugin.h" #include "plugin-types.h" #include "filterx-func-parse-csv.h" +#include "filterx-func-format-csv.h" extern CfgParser csvparser_parser; @@ -41,6 +42,11 @@ static Plugin csvparser_plugins[] = .name = "parse_csv", .construct = filterx_function_construct_parse_csv, }, + { + .type = LL_CONTEXT_FILTERX_FUNC, + .name = "format_csv", + .construct = filterx_function_construct_format_csv, + }, }; gboolean diff --git a/modules/csvparser/filterx-func-format-csv.c b/modules/csvparser/filterx-func-format-csv.c new file mode 100644 index 0000000000..9498b47319 --- /dev/null +++ b/modules/csvparser/filterx-func-format-csv.c @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2024 shifter + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#include "filterx-func-format-csv.h" +#include "filterx/expr-literal.h" +#include "filterx/object-string.h" +#include "filterx/object-null.h" +#include "filterx/object-dict-interface.h" +#include "filterx/object-list-interface.h" +#include "filterx/filterx-eval.h" + +#include "scratch-buffers.h" +#include "utf8utils.h" + +#define FILTERX_FUNC_FORMAT_CSV_USAGE "Usage: format_csv(csv_dict or csv_array)" + +typedef struct FilterXFunctionFormatCSV_ +{ + FilterXFunction super; + FilterXExpr *input; + gchar delimiter; + FilterXExpr *columns; +} FilterXFunctionFormatCSV; + +static gboolean +_append_to_buffer(FilterXObject *key, FilterXObject *value, gpointer user_data) +{ + if (!value) + return FALSE; + + FilterXFunctionFormatCSV *self = ((gpointer *) user_data)[0]; + GString *buffer = ((gpointer *) user_data)[1]; + + if (filterx_object_is_type(value, &FILTERX_TYPE_NAME(dict)) || + filterx_object_is_type(value, &FILTERX_TYPE_NAME(list))) + { + msg_debug("FilterX: format_csv(): skipping object, type not supported", + evt_tag_str("type", value->type->name)); + return TRUE; + } + + if (buffer->len) + g_string_append(buffer, &self->delimiter); + + gsize len_before_value = buffer->len; + if (!filterx_object_repr_append(value, buffer)) + return FALSE; + + /* TODO: make the characters here configurable. */ + if (memchr(buffer->str + len_before_value, self->delimiter, buffer->len - len_before_value) != NULL) + { + ScratchBuffersMarker marker; + GString *value_buffer = scratch_buffers_alloc_and_mark(&marker); + + g_string_assign(value_buffer, buffer->str + len_before_value); + g_string_truncate(buffer, len_before_value); + g_string_append_c(buffer, '"'); + append_unsafe_utf8_as_escaped_binary(buffer, value_buffer->str, value_buffer->len, "\""); + g_string_append_c(buffer, '"'); + + scratch_buffers_reclaim_marked(marker); + } + + return TRUE; +} + +static FilterXObject * +_eval(FilterXExpr *s) +{ + FilterXFunctionFormatCSV *self = (FilterXFunctionFormatCSV *) s; + + FilterXObject *csv_data = filterx_expr_eval_typed(self->input); + if (!csv_data) + { + filterx_eval_push_error("Failed to evaluate input. " FILTERX_FUNC_FORMAT_CSV_USAGE, s, NULL); + return NULL; + } + + gboolean success = FALSE; + GString *formatted = scratch_buffers_alloc(); + + if (filterx_object_is_type(csv_data, &FILTERX_TYPE_NAME(list))) + { + guint64 size; + if (!filterx_object_len(csv_data, &size)) + return FALSE; + + gpointer user_data[] = { self, formatted }; + success = TRUE; + for (guint64 i = 0; i < size && success; i++) + { + FilterXObject *elt = filterx_list_get_subscript(csv_data, i); + success = _append_to_buffer(NULL, elt, user_data); + filterx_object_unref(elt); + } + } + else if (filterx_object_is_type(csv_data, &FILTERX_TYPE_NAME(dict))) + { + if (self->columns) + { + FilterXObject *cols = filterx_expr_eval(self->columns); + if (filterx_object_is_type(cols, &FILTERX_TYPE_NAME(list))) + { + guint64 size; + if (!filterx_object_len(cols, &size)) + return FALSE; + + gpointer user_data[] = { self, formatted }; + success = TRUE; + for (guint64 i = 0; i < size && success; i++) + { + FilterXObject *col = filterx_list_get_subscript(cols, i); + FilterXObject *elt = filterx_object_get_subscript(csv_data, col); + success = _append_to_buffer(col, elt, user_data); + filterx_object_unref(col); + filterx_object_unref(elt); + } + } + filterx_object_unref(cols); + } + else + { + gpointer user_data[] = { self, formatted }; + success = filterx_dict_iter(csv_data, _append_to_buffer, user_data); + } + } + else + { + filterx_eval_push_error("input must be a dict or list. " FILTERX_FUNC_FORMAT_CSV_USAGE, s, csv_data); + filterx_object_unref(csv_data); + return NULL; + } + + filterx_object_unref(csv_data); + return success ? filterx_string_new(formatted->str, formatted->len) : NULL; +} + +static void +_free(FilterXExpr *s) +{ + FilterXFunctionFormatCSV *self = (FilterXFunctionFormatCSV *) s; + + filterx_expr_unref(self->input); + filterx_expr_unref(self->columns); + filterx_function_free_method(&self->super); +} + +static FilterXExpr * +_extract_columns_expr(FilterXFunctionArgs *args, GError **error) +{ + return filterx_function_args_get_named_expr(args, FILTERX_FUNC_FORMAT_CSV_ARG_NAME_COLUMNS); +} + +static gboolean +_extract_delimiter_arg(FilterXFunctionFormatCSV *self, FilterXFunctionArgs *args, GError **error) +{ + gboolean exists; + gsize delimiter_len; + const gchar *delimiter = filterx_function_args_get_named_literal_string(args, + FILTERX_FUNC_FORMAT_CSV_ARG_NAME_DELIMITER, + &delimiter_len, &exists); + if (!exists) + return TRUE; + + if (!delimiter) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "delimiter must be a string literal. " FILTERX_FUNC_FORMAT_CSV_USAGE); + return FALSE; + } + + if (delimiter_len != 1) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "delimiter must be a single character. " FILTERX_FUNC_FORMAT_CSV_USAGE); + return FALSE; + } + + self->delimiter = delimiter[0]; + return TRUE; +} + +static gboolean +_extract_arguments(FilterXFunctionFormatCSV *self, FilterXFunctionArgs *args, GError **error) +{ + gsize args_len = filterx_function_args_len(args); + if (args_len != 1) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "invalid number of arguments. " FILTERX_FUNC_FORMAT_CSV_USAGE); + return FALSE; + } + + self->input = filterx_function_args_get_expr(args, 0); + if (!self->input) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "input must be set. " FILTERX_FUNC_FORMAT_CSV_USAGE); + return FALSE; + } + + if (!_extract_delimiter_arg(self, args, error)) + return FALSE; + + self->columns = _extract_columns_expr(args, error); + + return TRUE; +} + +FilterXFunction * +filterx_function_format_csv_new(const gchar *function_name, FilterXFunctionArgs *args, GError **error) +{ + FilterXFunctionFormatCSV *self = g_new0(FilterXFunctionFormatCSV, 1); + filterx_function_init_instance(&self->super, function_name); + + self->super.super.eval = _eval; + self->super.super.free_fn = _free; + self->delimiter = ','; + + if (!_extract_arguments(self, args, error)) + goto error; + + filterx_function_args_free(args); + return &self->super; + +error: + filterx_function_args_free(args); + filterx_expr_unref(&self->super.super); + return NULL; +} + +gpointer +filterx_function_construct_format_csv(Plugin *self) +{ + return (gpointer) filterx_function_format_csv_new; +} diff --git a/modules/csvparser/filterx-func-format-csv.h b/modules/csvparser/filterx-func-format-csv.h new file mode 100644 index 0000000000..3b02890cdc --- /dev/null +++ b/modules/csvparser/filterx-func-format-csv.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2024 shifter + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#ifndef FILTERX_FUNC_FORMAT_CSV_H_INCLUDED +#define FILTERX_FUNC_FORMAT_CSV_H_INCLUDED + + +#include "plugin.h" +#include "filterx/expr-function.h" + +#define FILTERX_FUNC_FORMAT_CSV_ARG_NAME_COLUMNS "columns" +#define FILTERX_FUNC_FORMAT_CSV_ARG_NAME_DELIMITER "delimiter" + +FilterXFunction *filterx_function_format_csv_new(const gchar *function_name, FilterXFunctionArgs *args, GError **error); +gpointer filterx_function_construct_format_csv(Plugin *self); + +#endif